diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,73751 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3744091355829082, + "eval_steps": 250, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 9.360228389572705e-05, + "grad_norm": 101.65216064453125, + "learning_rate": 5e-05, + "loss": 8.6947, + "num_input_tokens_seen": 66716, + "step": 1 + }, + { + "epoch": 9.360228389572705e-05, + "loss": 8.418586730957031, + "loss_ce": 4.6138997077941895, + "loss_iou": 1.1171875, + "loss_num": 0.3125, + "loss_xval": 3.8125, + "num_input_tokens_seen": 66716, + "step": 1 + }, + { + "epoch": 0.0001872045677914541, + "grad_norm": 96.95561981201172, + "learning_rate": 5e-05, + "loss": 8.4681, + "num_input_tokens_seen": 133808, + "step": 2 + }, + { + "epoch": 0.0001872045677914541, + "loss": 8.591793060302734, + "loss_ce": 4.539058685302734, + "loss_iou": 1.203125, + "loss_num": 0.33203125, + "loss_xval": 4.0625, + "num_input_tokens_seen": 133808, + "step": 2 + }, + { + "epoch": 0.00028080685168718116, + "grad_norm": 98.5390853881836, + "learning_rate": 5e-05, + "loss": 8.838, + "num_input_tokens_seen": 198860, + "step": 3 + }, + { + "epoch": 0.00028080685168718116, + "loss": 8.935352325439453, + "loss_ce": 4.909960746765137, + "loss_iou": 1.21875, + "loss_num": 0.31640625, + "loss_xval": 4.03125, + "num_input_tokens_seen": 198860, + "step": 3 + }, + { + "epoch": 0.0003744091355829082, + "grad_norm": 99.13172912597656, + "learning_rate": 5e-05, + "loss": 8.4022, + "num_input_tokens_seen": 265008, + "step": 4 + }, + { + "epoch": 0.0003744091355829082, + "loss": 8.233530044555664, + "loss_ce": 4.821421146392822, + "loss_iou": 0.99609375, + "loss_num": 0.28515625, + "loss_xval": 3.40625, + "num_input_tokens_seen": 265008, + "step": 4 + }, + { + "epoch": 0.00046801141947863526, + "grad_norm": 94.90145111083984, + "learning_rate": 5e-05, + "loss": 8.5621, + "num_input_tokens_seen": 331540, + "step": 5 + }, + { + "epoch": 0.00046801141947863526, + "loss": 8.46754264831543, + "loss_ce": 4.4636359214782715, + "loss_iou": 1.2265625, + "loss_num": 0.30859375, + "loss_xval": 4.0, + "num_input_tokens_seen": 331540, + "step": 5 + }, + { + "epoch": 0.0005616137033743623, + "grad_norm": 95.53054809570312, + "learning_rate": 5e-05, + "loss": 8.3328, + "num_input_tokens_seen": 397776, + "step": 6 + }, + { + "epoch": 0.0005616137033743623, + "loss": 8.50499153137207, + "loss_ce": 4.422959804534912, + "loss_iou": 1.21875, + "loss_num": 0.328125, + "loss_xval": 4.09375, + "num_input_tokens_seen": 397776, + "step": 6 + }, + { + "epoch": 0.0006552159872700894, + "grad_norm": 90.96040344238281, + "learning_rate": 5e-05, + "loss": 7.9192, + "num_input_tokens_seen": 464376, + "step": 7 + }, + { + "epoch": 0.0006552159872700894, + "loss": 7.958445072174072, + "loss_ce": 3.9564919471740723, + "loss_iou": 1.21875, + "loss_num": 0.314453125, + "loss_xval": 4.0, + "num_input_tokens_seen": 464376, + "step": 7 + }, + { + "epoch": 0.0007488182711658164, + "grad_norm": 86.27174377441406, + "learning_rate": 5e-05, + "loss": 7.4725, + "num_input_tokens_seen": 531144, + "step": 8 + }, + { + "epoch": 0.0007488182711658164, + "loss": 7.436006546020508, + "loss_ce": 3.336397171020508, + "loss_iou": 1.2421875, + "loss_num": 0.322265625, + "loss_xval": 4.09375, + "num_input_tokens_seen": 531144, + "step": 8 + }, + { + "epoch": 0.0008424205550615435, + "grad_norm": 77.49860382080078, + "learning_rate": 5e-05, + "loss": 7.1421, + "num_input_tokens_seen": 597284, + "step": 9 + }, + { + "epoch": 0.0008424205550615435, + "loss": 7.4713544845581055, + "loss_ce": 3.3854169845581055, + "loss_iou": 1.2890625, + "loss_num": 0.30078125, + "loss_xval": 4.09375, + "num_input_tokens_seen": 597284, + "step": 9 + }, + { + "epoch": 0.0009360228389572705, + "grad_norm": 82.7765884399414, + "learning_rate": 5e-05, + "loss": 6.3314, + "num_input_tokens_seen": 663084, + "step": 10 + }, + { + "epoch": 0.0009360228389572705, + "loss": 6.534829139709473, + "loss_ce": 2.8512353897094727, + "loss_iou": 1.1640625, + "loss_num": 0.26953125, + "loss_xval": 3.6875, + "num_input_tokens_seen": 663084, + "step": 10 + }, + { + "epoch": 0.0010296251228529977, + "grad_norm": 41.23231887817383, + "learning_rate": 5e-05, + "loss": 5.5767, + "num_input_tokens_seen": 728692, + "step": 11 + }, + { + "epoch": 0.0010296251228529977, + "loss": 5.544264793395996, + "loss_ce": 1.9759057760238647, + "loss_iou": 1.0703125, + "loss_num": 0.28515625, + "loss_xval": 3.5625, + "num_input_tokens_seen": 728692, + "step": 11 + }, + { + "epoch": 0.0011232274067487246, + "grad_norm": 35.95630645751953, + "learning_rate": 5e-05, + "loss": 5.464, + "num_input_tokens_seen": 795364, + "step": 12 + }, + { + "epoch": 0.0011232274067487246, + "loss": 5.679543972015381, + "loss_ce": 1.7693877220153809, + "loss_iou": 1.2421875, + "loss_num": 0.287109375, + "loss_xval": 3.90625, + "num_input_tokens_seen": 795364, + "step": 12 + }, + { + "epoch": 0.0012168296906444518, + "grad_norm": 30.46725845336914, + "learning_rate": 5e-05, + "loss": 5.1851, + "num_input_tokens_seen": 861160, + "step": 13 + }, + { + "epoch": 0.0012168296906444518, + "loss": 5.258492469787598, + "loss_ce": 1.4420864582061768, + "loss_iou": 1.2109375, + "loss_num": 0.27734375, + "loss_xval": 3.8125, + "num_input_tokens_seen": 861160, + "step": 13 + }, + { + "epoch": 0.0013104319745401787, + "grad_norm": 34.04572296142578, + "learning_rate": 5e-05, + "loss": 4.9258, + "num_input_tokens_seen": 926332, + "step": 14 + }, + { + "epoch": 0.0013104319745401787, + "loss": 4.778914451599121, + "loss_ce": 1.314070463180542, + "loss_iou": 1.109375, + "loss_num": 0.25, + "loss_xval": 3.46875, + "num_input_tokens_seen": 926332, + "step": 14 + }, + { + "epoch": 0.0014040342584359059, + "grad_norm": 32.23663330078125, + "learning_rate": 5e-05, + "loss": 4.3431, + "num_input_tokens_seen": 992124, + "step": 15 + }, + { + "epoch": 0.0014040342584359059, + "loss": 4.308436393737793, + "loss_ce": 0.9920302629470825, + "loss_iou": 1.0859375, + "loss_num": 0.2275390625, + "loss_xval": 3.3125, + "num_input_tokens_seen": 992124, + "step": 15 + }, + { + "epoch": 0.0014976365423316328, + "grad_norm": 19.634584426879883, + "learning_rate": 5e-05, + "loss": 3.8286, + "num_input_tokens_seen": 1057580, + "step": 16 + }, + { + "epoch": 0.0014976365423316328, + "loss": 3.418391704559326, + "loss_ce": 0.7045247554779053, + "loss_iou": 0.83984375, + "loss_num": 0.2060546875, + "loss_xval": 2.71875, + "num_input_tokens_seen": 1057580, + "step": 16 + }, + { + "epoch": 0.00159123882622736, + "grad_norm": 26.426013946533203, + "learning_rate": 5e-05, + "loss": 4.1967, + "num_input_tokens_seen": 1123876, + "step": 17 + }, + { + "epoch": 0.00159123882622736, + "loss": 4.267147064208984, + "loss_ce": 0.5249598622322083, + "loss_iou": 1.25, + "loss_num": 0.248046875, + "loss_xval": 3.75, + "num_input_tokens_seen": 1123876, + "step": 17 + }, + { + "epoch": 0.001684841110123087, + "grad_norm": 36.31227493286133, + "learning_rate": 5e-05, + "loss": 3.6872, + "num_input_tokens_seen": 1190144, + "step": 18 + }, + { + "epoch": 0.001684841110123087, + "loss": 3.7517099380493164, + "loss_ce": 0.32592862844467163, + "loss_iou": 1.1484375, + "loss_num": 0.2255859375, + "loss_xval": 3.421875, + "num_input_tokens_seen": 1190144, + "step": 18 + }, + { + "epoch": 0.001778443394018814, + "grad_norm": 20.471094131469727, + "learning_rate": 5e-05, + "loss": 3.2612, + "num_input_tokens_seen": 1255520, + "step": 19 + }, + { + "epoch": 0.001778443394018814, + "loss": 3.0514767169952393, + "loss_ce": 0.12569549679756165, + "loss_iou": 0.859375, + "loss_num": 0.2421875, + "loss_xval": 2.921875, + "num_input_tokens_seen": 1255520, + "step": 19 + }, + { + "epoch": 0.001872045677914541, + "grad_norm": 13.815719604492188, + "learning_rate": 5e-05, + "loss": 3.565, + "num_input_tokens_seen": 1322952, + "step": 20 + }, + { + "epoch": 0.001872045677914541, + "loss": 3.4919581413269043, + "loss_ce": 0.06422379612922668, + "loss_iou": 1.1328125, + "loss_num": 0.2314453125, + "loss_xval": 3.421875, + "num_input_tokens_seen": 1322952, + "step": 20 + }, + { + "epoch": 0.001965647961810268, + "grad_norm": 28.042123794555664, + "learning_rate": 5e-05, + "loss": 3.384, + "num_input_tokens_seen": 1389420, + "step": 21 + }, + { + "epoch": 0.001965647961810268, + "loss": 3.3590993881225586, + "loss_ce": 0.04464637115597725, + "loss_iou": 1.109375, + "loss_num": 0.220703125, + "loss_xval": 3.3125, + "num_input_tokens_seen": 1389420, + "step": 21 + }, + { + "epoch": 0.0020592502457059954, + "grad_norm": 14.250997543334961, + "learning_rate": 5e-05, + "loss": 3.408, + "num_input_tokens_seen": 1455232, + "step": 22 + }, + { + "epoch": 0.0020592502457059954, + "loss": 3.383174180984497, + "loss_ce": 0.05114297196269035, + "loss_iou": 1.140625, + "loss_num": 0.2119140625, + "loss_xval": 3.328125, + "num_input_tokens_seen": 1455232, + "step": 22 + }, + { + "epoch": 0.002152852529601722, + "grad_norm": 11.689615249633789, + "learning_rate": 5e-05, + "loss": 3.3596, + "num_input_tokens_seen": 1521676, + "step": 23 + }, + { + "epoch": 0.002152852529601722, + "loss": 3.3575775623321533, + "loss_ce": 0.043124400079250336, + "loss_iou": 1.1484375, + "loss_num": 0.205078125, + "loss_xval": 3.3125, + "num_input_tokens_seen": 1521676, + "step": 23 + }, + { + "epoch": 0.0022464548134974493, + "grad_norm": 16.82274627685547, + "learning_rate": 5e-05, + "loss": 3.3032, + "num_input_tokens_seen": 1587692, + "step": 24 + }, + { + "epoch": 0.0022464548134974493, + "loss": 3.3633084297180176, + "loss_ce": 0.023464728146791458, + "loss_iou": 1.140625, + "loss_num": 0.212890625, + "loss_xval": 3.34375, + "num_input_tokens_seen": 1587692, + "step": 24 + }, + { + "epoch": 0.0023400570973931764, + "grad_norm": 60.981964111328125, + "learning_rate": 5e-05, + "loss": 3.1801, + "num_input_tokens_seen": 1652968, + "step": 25 + }, + { + "epoch": 0.0023400570973931764, + "loss": 3.167349100112915, + "loss_ce": 0.0384429395198822, + "loss_iou": 1.0625, + "loss_num": 0.203125, + "loss_xval": 3.125, + "num_input_tokens_seen": 1652968, + "step": 25 + }, + { + "epoch": 0.0024336593812889036, + "grad_norm": 45.581478118896484, + "learning_rate": 5e-05, + "loss": 3.6619, + "num_input_tokens_seen": 1719392, + "step": 26 + }, + { + "epoch": 0.0024336593812889036, + "loss": 3.7349956035614014, + "loss_ce": 0.04163616895675659, + "loss_iou": 1.203125, + "loss_num": 0.259765625, + "loss_xval": 3.6875, + "num_input_tokens_seen": 1719392, + "step": 26 + }, + { + "epoch": 0.0025272616651846303, + "grad_norm": 28.024511337280273, + "learning_rate": 5e-05, + "loss": 3.3445, + "num_input_tokens_seen": 1785800, + "step": 27 + }, + { + "epoch": 0.0025272616651846303, + "loss": 3.184875965118408, + "loss_ce": 0.04132130742073059, + "loss_iou": 0.9609375, + "loss_num": 0.2451171875, + "loss_xval": 3.140625, + "num_input_tokens_seen": 1785800, + "step": 27 + }, + { + "epoch": 0.0026208639490803575, + "grad_norm": 13.812726974487305, + "learning_rate": 5e-05, + "loss": 3.0071, + "num_input_tokens_seen": 1851224, + "step": 28 + }, + { + "epoch": 0.0026208639490803575, + "loss": 3.247553586959839, + "loss_ce": 0.03075670264661312, + "loss_iou": 1.0703125, + "loss_num": 0.21484375, + "loss_xval": 3.21875, + "num_input_tokens_seen": 1851224, + "step": 28 + }, + { + "epoch": 0.0027144662329760846, + "grad_norm": 56.087398529052734, + "learning_rate": 5e-05, + "loss": 3.1415, + "num_input_tokens_seen": 1917632, + "step": 29 + }, + { + "epoch": 0.0027144662329760846, + "loss": 3.1579842567443848, + "loss_ce": 0.02517169900238514, + "loss_iou": 1.0546875, + "loss_num": 0.2060546875, + "loss_xval": 3.125, + "num_input_tokens_seen": 1917632, + "step": 29 + }, + { + "epoch": 0.0028080685168718118, + "grad_norm": 44.8372688293457, + "learning_rate": 5e-05, + "loss": 3.4511, + "num_input_tokens_seen": 1984736, + "step": 30 + }, + { + "epoch": 0.0028080685168718118, + "loss": 3.401815414428711, + "loss_ce": 0.02876855805516243, + "loss_iou": 1.1328125, + "loss_num": 0.2216796875, + "loss_xval": 3.375, + "num_input_tokens_seen": 1984736, + "step": 30 + }, + { + "epoch": 0.002901670800767539, + "grad_norm": 33.350791931152344, + "learning_rate": 5e-05, + "loss": 3.3317, + "num_input_tokens_seen": 2051392, + "step": 31 + }, + { + "epoch": 0.002901670800767539, + "loss": 3.283022403717041, + "loss_ce": 0.021303577348589897, + "loss_iou": 1.125, + "loss_num": 0.2021484375, + "loss_xval": 3.265625, + "num_input_tokens_seen": 2051392, + "step": 31 + }, + { + "epoch": 0.0029952730846632657, + "grad_norm": 17.58970069885254, + "learning_rate": 5e-05, + "loss": 2.9831, + "num_input_tokens_seen": 2117932, + "step": 32 + }, + { + "epoch": 0.0029952730846632657, + "loss": 2.834751605987549, + "loss_ce": 0.030063960701227188, + "loss_iou": 0.9140625, + "loss_num": 0.1943359375, + "loss_xval": 2.8125, + "num_input_tokens_seen": 2117932, + "step": 32 + }, + { + "epoch": 0.003088875368558993, + "grad_norm": 24.616716384887695, + "learning_rate": 5e-05, + "loss": 3.0216, + "num_input_tokens_seen": 2184880, + "step": 33 + }, + { + "epoch": 0.003088875368558993, + "loss": 3.0619702339172363, + "loss_ce": 0.01900148205459118, + "loss_iou": 1.046875, + "loss_num": 0.1884765625, + "loss_xval": 3.046875, + "num_input_tokens_seen": 2184880, + "step": 33 + }, + { + "epoch": 0.00318247765245472, + "grad_norm": 86.11531066894531, + "learning_rate": 5e-05, + "loss": 3.2853, + "num_input_tokens_seen": 2251368, + "step": 34 + }, + { + "epoch": 0.00318247765245472, + "loss": 3.290015697479248, + "loss_ce": 0.020484520122408867, + "loss_iou": 1.1953125, + "loss_num": 0.1767578125, + "loss_xval": 3.265625, + "num_input_tokens_seen": 2251368, + "step": 34 + }, + { + "epoch": 0.003276079936350447, + "grad_norm": 28.53223419189453, + "learning_rate": 5e-05, + "loss": 3.7949, + "num_input_tokens_seen": 2318268, + "step": 35 + }, + { + "epoch": 0.003276079936350447, + "loss": 3.716911792755127, + "loss_ce": 0.017692841589450836, + "loss_iou": 1.1953125, + "loss_num": 0.259765625, + "loss_xval": 3.703125, + "num_input_tokens_seen": 2318268, + "step": 35 + }, + { + "epoch": 0.003369682220246174, + "grad_norm": 18.319623947143555, + "learning_rate": 5e-05, + "loss": 3.6345, + "num_input_tokens_seen": 2385824, + "step": 36 + }, + { + "epoch": 0.003369682220246174, + "loss": 3.726459503173828, + "loss_ce": 0.019428269937634468, + "loss_iou": 1.1796875, + "loss_num": 0.26953125, + "loss_xval": 3.703125, + "num_input_tokens_seen": 2385824, + "step": 36 + }, + { + "epoch": 0.003463284504141901, + "grad_norm": 16.41934585571289, + "learning_rate": 5e-05, + "loss": 3.4501, + "num_input_tokens_seen": 2452780, + "step": 37 + }, + { + "epoch": 0.003463284504141901, + "loss": 3.402754545211792, + "loss_ce": 0.021895283833146095, + "loss_iou": 1.1171875, + "loss_num": 0.23046875, + "loss_xval": 3.375, + "num_input_tokens_seen": 2452780, + "step": 37 + }, + { + "epoch": 0.003556886788037628, + "grad_norm": 14.25910472869873, + "learning_rate": 5e-05, + "loss": 3.3161, + "num_input_tokens_seen": 2519612, + "step": 38 + }, + { + "epoch": 0.003556886788037628, + "loss": 3.2734827995300293, + "loss_ce": 0.015670407563447952, + "loss_iou": 1.0859375, + "loss_num": 0.2177734375, + "loss_xval": 3.25, + "num_input_tokens_seen": 2519612, + "step": 38 + }, + { + "epoch": 0.0036504890719333554, + "grad_norm": 15.347772598266602, + "learning_rate": 5e-05, + "loss": 3.1594, + "num_input_tokens_seen": 2585796, + "step": 39 + }, + { + "epoch": 0.0036504890719333554, + "loss": 3.0453782081604004, + "loss_ce": 0.01608125865459442, + "loss_iou": 1.03125, + "loss_num": 0.193359375, + "loss_xval": 3.03125, + "num_input_tokens_seen": 2585796, + "step": 39 + }, + { + "epoch": 0.003744091355829082, + "grad_norm": 19.047100067138672, + "learning_rate": 5e-05, + "loss": 3.2023, + "num_input_tokens_seen": 2652228, + "step": 40 + }, + { + "epoch": 0.003744091355829082, + "loss": 3.250826835632324, + "loss_ce": 0.022311009466648102, + "loss_iou": 1.078125, + "loss_num": 0.2158203125, + "loss_xval": 3.234375, + "num_input_tokens_seen": 2652228, + "step": 40 + }, + { + "epoch": 0.0038376936397248092, + "grad_norm": 34.57749557495117, + "learning_rate": 5e-05, + "loss": 3.1466, + "num_input_tokens_seen": 2718992, + "step": 41 + }, + { + "epoch": 0.0038376936397248092, + "loss": 3.1191940307617188, + "loss_ce": 0.013725237920880318, + "loss_iou": 1.1328125, + "loss_num": 0.1669921875, + "loss_xval": 3.109375, + "num_input_tokens_seen": 2718992, + "step": 41 + }, + { + "epoch": 0.003931295923620536, + "grad_norm": 35.58552551269531, + "learning_rate": 5e-05, + "loss": 3.2487, + "num_input_tokens_seen": 2786188, + "step": 42 + }, + { + "epoch": 0.003931295923620536, + "loss": 3.334336757659912, + "loss_ce": 0.01793038472533226, + "loss_iou": 1.125, + "loss_num": 0.2119140625, + "loss_xval": 3.3125, + "num_input_tokens_seen": 2786188, + "step": 42 + }, + { + "epoch": 0.0040248982075162636, + "grad_norm": 21.095924377441406, + "learning_rate": 5e-05, + "loss": 2.8285, + "num_input_tokens_seen": 2851692, + "step": 43 + }, + { + "epoch": 0.0040248982075162636, + "loss": 3.0042130947113037, + "loss_ce": 0.0198379959911108, + "loss_iou": 1.078125, + "loss_num": 0.1640625, + "loss_xval": 2.984375, + "num_input_tokens_seen": 2851692, + "step": 43 + }, + { + "epoch": 0.004118500491411991, + "grad_norm": 35.09490966796875, + "learning_rate": 5e-05, + "loss": 3.01, + "num_input_tokens_seen": 2916516, + "step": 44 + }, + { + "epoch": 0.004118500491411991, + "loss": 2.9932496547698975, + "loss_ce": 0.020593497902154922, + "loss_iou": 1.046875, + "loss_num": 0.17578125, + "loss_xval": 2.96875, + "num_input_tokens_seen": 2916516, + "step": 44 + }, + { + "epoch": 0.004212102775307718, + "grad_norm": 29.586074829101562, + "learning_rate": 5e-05, + "loss": 3.2593, + "num_input_tokens_seen": 2982764, + "step": 45 + }, + { + "epoch": 0.004212102775307718, + "loss": 3.2288575172424316, + "loss_ce": 0.01401369459927082, + "loss_iou": 1.171875, + "loss_num": 0.1728515625, + "loss_xval": 3.21875, + "num_input_tokens_seen": 2982764, + "step": 45 + }, + { + "epoch": 0.004305705059203444, + "grad_norm": 20.80161476135254, + "learning_rate": 5e-05, + "loss": 2.7546, + "num_input_tokens_seen": 3049572, + "step": 46 + }, + { + "epoch": 0.004305705059203444, + "loss": 2.786905288696289, + "loss_ce": 0.02225703001022339, + "loss_iou": 0.92578125, + "loss_num": 0.1826171875, + "loss_xval": 2.765625, + "num_input_tokens_seen": 3049572, + "step": 46 + }, + { + "epoch": 0.004399307343099171, + "grad_norm": 48.15776443481445, + "learning_rate": 5e-05, + "loss": 2.8875, + "num_input_tokens_seen": 3114936, + "step": 47 + }, + { + "epoch": 0.004399307343099171, + "loss": 2.9008116722106934, + "loss_ce": 0.02434687502682209, + "loss_iou": 1.0234375, + "loss_num": 0.166015625, + "loss_xval": 2.875, + "num_input_tokens_seen": 3114936, + "step": 47 + }, + { + "epoch": 0.0044929096269948985, + "grad_norm": 27.882295608520508, + "learning_rate": 5e-05, + "loss": 3.3689, + "num_input_tokens_seen": 3182172, + "step": 48 + }, + { + "epoch": 0.0044929096269948985, + "loss": 3.354379653930664, + "loss_ce": 0.022348247468471527, + "loss_iou": 1.109375, + "loss_num": 0.22265625, + "loss_xval": 3.328125, + "num_input_tokens_seen": 3182172, + "step": 48 + }, + { + "epoch": 0.004586511910890626, + "grad_norm": 33.86907958984375, + "learning_rate": 5e-05, + "loss": 3.1029, + "num_input_tokens_seen": 3248160, + "step": 49 + }, + { + "epoch": 0.004586511910890626, + "loss": 3.132784605026245, + "loss_ce": 0.025362728163599968, + "loss_iou": 1.1015625, + "loss_num": 0.1796875, + "loss_xval": 3.109375, + "num_input_tokens_seen": 3248160, + "step": 49 + }, + { + "epoch": 0.004680114194786353, + "grad_norm": 20.456836700439453, + "learning_rate": 5e-05, + "loss": 2.7285, + "num_input_tokens_seen": 3313712, + "step": 50 + }, + { + "epoch": 0.004680114194786353, + "loss": 3.049952507019043, + "loss_ce": 0.01284329779446125, + "loss_iou": 1.0234375, + "loss_num": 0.1962890625, + "loss_xval": 3.03125, + "num_input_tokens_seen": 3313712, + "step": 50 + }, + { + "epoch": 0.00477371647868208, + "grad_norm": 35.79520797729492, + "learning_rate": 5e-05, + "loss": 2.8448, + "num_input_tokens_seen": 3379916, + "step": 51 + }, + { + "epoch": 0.00477371647868208, + "loss": 3.0145621299743652, + "loss_ce": 0.014562256634235382, + "loss_iou": 1.1015625, + "loss_num": 0.1591796875, + "loss_xval": 3.0, + "num_input_tokens_seen": 3379916, + "step": 51 + }, + { + "epoch": 0.004867318762577807, + "grad_norm": 15.867273330688477, + "learning_rate": 5e-05, + "loss": 2.815, + "num_input_tokens_seen": 3445848, + "step": 52 + }, + { + "epoch": 0.004867318762577807, + "loss": 2.692965507507324, + "loss_ce": 0.009859994053840637, + "loss_iou": 0.8984375, + "loss_num": 0.177734375, + "loss_xval": 2.6875, + "num_input_tokens_seen": 3445848, + "step": 52 + }, + { + "epoch": 0.004960921046473534, + "grad_norm": 16.5876407623291, + "learning_rate": 5e-05, + "loss": 2.8533, + "num_input_tokens_seen": 3512724, + "step": 53 + }, + { + "epoch": 0.004960921046473534, + "loss": 2.8537731170654297, + "loss_ce": 0.019788645207881927, + "loss_iou": 1.0546875, + "loss_num": 0.14453125, + "loss_xval": 2.828125, + "num_input_tokens_seen": 3512724, + "step": 53 + }, + { + "epoch": 0.005054523330369261, + "grad_norm": 38.60595703125, + "learning_rate": 5e-05, + "loss": 2.6611, + "num_input_tokens_seen": 3578592, + "step": 54 + }, + { + "epoch": 0.005054523330369261, + "loss": 2.607724189758301, + "loss_ce": 0.008114909753203392, + "loss_iou": 0.95703125, + "loss_num": 0.1376953125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 3578592, + "step": 54 + }, + { + "epoch": 0.005148125614264988, + "grad_norm": 52.92810821533203, + "learning_rate": 5e-05, + "loss": 2.9911, + "num_input_tokens_seen": 3644532, + "step": 55 + }, + { + "epoch": 0.005148125614264988, + "loss": 2.998157501220703, + "loss_ce": 0.009876357391476631, + "loss_iou": 1.15625, + "loss_num": 0.1357421875, + "loss_xval": 2.984375, + "num_input_tokens_seen": 3644532, + "step": 55 + }, + { + "epoch": 0.005241727898160715, + "grad_norm": 29.84058952331543, + "learning_rate": 5e-05, + "loss": 2.8044, + "num_input_tokens_seen": 3711204, + "step": 56 + }, + { + "epoch": 0.005241727898160715, + "loss": 2.824930191040039, + "loss_ce": 0.008523866534233093, + "loss_iou": 1.046875, + "loss_num": 0.1435546875, + "loss_xval": 2.8125, + "num_input_tokens_seen": 3711204, + "step": 56 + }, + { + "epoch": 0.005335330182056442, + "grad_norm": 17.782625198364258, + "learning_rate": 5e-05, + "loss": 2.7295, + "num_input_tokens_seen": 3777152, + "step": 57 + }, + { + "epoch": 0.005335330182056442, + "loss": 2.6248414516448975, + "loss_ce": 0.007653830572962761, + "loss_iou": 1.0, + "loss_num": 0.1240234375, + "loss_xval": 2.625, + "num_input_tokens_seen": 3777152, + "step": 57 + }, + { + "epoch": 0.005428932465952169, + "grad_norm": 15.291202545166016, + "learning_rate": 5e-05, + "loss": 2.6158, + "num_input_tokens_seen": 3841644, + "step": 58 + }, + { + "epoch": 0.005428932465952169, + "loss": 2.5831832885742188, + "loss_ce": 0.008964783512055874, + "loss_iou": 0.99609375, + "loss_num": 0.11669921875, + "loss_xval": 2.578125, + "num_input_tokens_seen": 3841644, + "step": 58 + }, + { + "epoch": 0.005522534749847896, + "grad_norm": 62.47341537475586, + "learning_rate": 5e-05, + "loss": 2.5464, + "num_input_tokens_seen": 3907636, + "step": 59 + }, + { + "epoch": 0.005522534749847896, + "loss": 2.5465049743652344, + "loss_ce": 0.005489123519510031, + "loss_iou": 1.0390625, + "loss_num": 0.09375, + "loss_xval": 2.546875, + "num_input_tokens_seen": 3907636, + "step": 59 + }, + { + "epoch": 0.0056161370337436236, + "grad_norm": 19.503198623657227, + "learning_rate": 5e-05, + "loss": 2.9708, + "num_input_tokens_seen": 3973224, + "step": 60 + }, + { + "epoch": 0.0056161370337436236, + "loss": 2.744785785675049, + "loss_ce": 0.015293493866920471, + "loss_iou": 1.0, + "loss_num": 0.1455078125, + "loss_xval": 2.734375, + "num_input_tokens_seen": 3973224, + "step": 60 + }, + { + "epoch": 0.005709739317639351, + "grad_norm": 26.515504837036133, + "learning_rate": 5e-05, + "loss": 3.1155, + "num_input_tokens_seen": 4040240, + "step": 61 + }, + { + "epoch": 0.005709739317639351, + "loss": 3.1289381980895996, + "loss_ce": 0.009797676466405392, + "loss_iou": 1.1796875, + "loss_num": 0.15234375, + "loss_xval": 3.125, + "num_input_tokens_seen": 4040240, + "step": 61 + }, + { + "epoch": 0.005803341601535078, + "grad_norm": 16.495893478393555, + "learning_rate": 5e-05, + "loss": 2.9449, + "num_input_tokens_seen": 4106088, + "step": 62 + }, + { + "epoch": 0.005803341601535078, + "loss": 3.079580307006836, + "loss_ce": 0.011220941320061684, + "loss_iou": 1.109375, + "loss_num": 0.171875, + "loss_xval": 3.0625, + "num_input_tokens_seen": 4106088, + "step": 62 + }, + { + "epoch": 0.005896943885430804, + "grad_norm": 19.985366821289062, + "learning_rate": 5e-05, + "loss": 2.9913, + "num_input_tokens_seen": 4172704, + "step": 63 + }, + { + "epoch": 0.005896943885430804, + "loss": 2.873556613922119, + "loss_ce": 0.010275539010763168, + "loss_iou": 1.0625, + "loss_num": 0.146484375, + "loss_xval": 2.859375, + "num_input_tokens_seen": 4172704, + "step": 63 + }, + { + "epoch": 0.005990546169326531, + "grad_norm": 19.245983123779297, + "learning_rate": 5e-05, + "loss": 2.6785, + "num_input_tokens_seen": 4239912, + "step": 64 + }, + { + "epoch": 0.005990546169326531, + "loss": 2.6892271041870117, + "loss_ce": 0.007586401421576738, + "loss_iou": 1.015625, + "loss_num": 0.130859375, + "loss_xval": 2.6875, + "num_input_tokens_seen": 4239912, + "step": 64 + }, + { + "epoch": 0.0060841484532222585, + "grad_norm": 14.93860149383545, + "learning_rate": 5e-05, + "loss": 2.3807, + "num_input_tokens_seen": 4305944, + "step": 65 + }, + { + "epoch": 0.0060841484532222585, + "loss": 2.3514838218688965, + "loss_ce": 0.006269071251153946, + "loss_iou": 0.92578125, + "loss_num": 0.09814453125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 4305944, + "step": 65 + }, + { + "epoch": 0.006177750737117986, + "grad_norm": 24.663524627685547, + "learning_rate": 5e-05, + "loss": 2.6203, + "num_input_tokens_seen": 4372744, + "step": 66 + }, + { + "epoch": 0.006177750737117986, + "loss": 2.5763978958129883, + "loss_ce": 0.008038531057536602, + "loss_iou": 1.0390625, + "loss_num": 0.09716796875, + "loss_xval": 2.5625, + "num_input_tokens_seen": 4372744, + "step": 66 + }, + { + "epoch": 0.006271353021013713, + "grad_norm": 38.61247253417969, + "learning_rate": 5e-05, + "loss": 2.8844, + "num_input_tokens_seen": 4439252, + "step": 67 + }, + { + "epoch": 0.006271353021013713, + "loss": 3.074906826019287, + "loss_ce": 0.012406734749674797, + "loss_iou": 1.234375, + "loss_num": 0.1181640625, + "loss_xval": 3.0625, + "num_input_tokens_seen": 4439252, + "step": 67 + }, + { + "epoch": 0.00636495530490944, + "grad_norm": 36.13186264038086, + "learning_rate": 5e-05, + "loss": 2.6886, + "num_input_tokens_seen": 4505692, + "step": 68 + }, + { + "epoch": 0.00636495530490944, + "loss": 2.7200677394866943, + "loss_ce": 0.009130253456532955, + "loss_iou": 1.0546875, + "loss_num": 0.1201171875, + "loss_xval": 2.71875, + "num_input_tokens_seen": 4505692, + "step": 68 + }, + { + "epoch": 0.006458557588805167, + "grad_norm": 17.341550827026367, + "learning_rate": 5e-05, + "loss": 2.6977, + "num_input_tokens_seen": 4571836, + "step": 69 + }, + { + "epoch": 0.006458557588805167, + "loss": 2.6547369956970215, + "loss_ce": 0.015088449232280254, + "loss_iou": 0.95703125, + "loss_num": 0.1455078125, + "loss_xval": 2.640625, + "num_input_tokens_seen": 4571836, + "step": 69 + }, + { + "epoch": 0.006552159872700894, + "grad_norm": 11.432044982910156, + "learning_rate": 5e-05, + "loss": 2.5908, + "num_input_tokens_seen": 4638160, + "step": 70 + }, + { + "epoch": 0.006552159872700894, + "loss": 2.603581428527832, + "loss_ce": 0.003971965983510017, + "loss_iou": 0.99609375, + "loss_num": 0.1220703125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 4638160, + "step": 70 + }, + { + "epoch": 0.006645762156596621, + "grad_norm": 12.256217002868652, + "learning_rate": 5e-05, + "loss": 2.2877, + "num_input_tokens_seen": 4703816, + "step": 71 + }, + { + "epoch": 0.006645762156596621, + "loss": 2.4855265617370605, + "loss_ce": 0.010917097330093384, + "loss_iou": 0.9765625, + "loss_num": 0.10498046875, + "loss_xval": 2.46875, + "num_input_tokens_seen": 4703816, + "step": 71 + }, + { + "epoch": 0.006739364440492348, + "grad_norm": 86.1080551147461, + "learning_rate": 5e-05, + "loss": 2.6883, + "num_input_tokens_seen": 4770108, + "step": 72 + }, + { + "epoch": 0.006739364440492348, + "loss": 2.6870758533477783, + "loss_ce": 0.009341409429907799, + "loss_iou": 1.1171875, + "loss_num": 0.08935546875, + "loss_xval": 2.671875, + "num_input_tokens_seen": 4770108, + "step": 72 + }, + { + "epoch": 0.006832966724388075, + "grad_norm": 21.067468643188477, + "learning_rate": 5e-05, + "loss": 2.9645, + "num_input_tokens_seen": 4836600, + "step": 73 + }, + { + "epoch": 0.006832966724388075, + "loss": 2.9787683486938477, + "loss_ce": 0.019783899188041687, + "loss_iou": 1.0859375, + "loss_num": 0.15625, + "loss_xval": 2.953125, + "num_input_tokens_seen": 4836600, + "step": 73 + }, + { + "epoch": 0.006926569008283802, + "grad_norm": 25.35846710205078, + "learning_rate": 5e-05, + "loss": 2.9524, + "num_input_tokens_seen": 4904020, + "step": 74 + }, + { + "epoch": 0.006926569008283802, + "loss": 3.219608783721924, + "loss_ce": 0.05554642528295517, + "loss_iou": 1.1171875, + "loss_num": 0.1875, + "loss_xval": 3.15625, + "num_input_tokens_seen": 4904020, + "step": 74 + }, + { + "epoch": 0.007020171292179529, + "grad_norm": 12.379012107849121, + "learning_rate": 5e-05, + "loss": 2.727, + "num_input_tokens_seen": 4969560, + "step": 75 + }, + { + "epoch": 0.007020171292179529, + "loss": 2.524005889892578, + "loss_ce": 0.008869229815900326, + "loss_iou": 0.9140625, + "loss_num": 0.13671875, + "loss_xval": 2.515625, + "num_input_tokens_seen": 4969560, + "step": 75 + }, + { + "epoch": 0.007113773576075256, + "grad_norm": 20.057796478271484, + "learning_rate": 5e-05, + "loss": 2.896, + "num_input_tokens_seen": 5037228, + "step": 76 + }, + { + "epoch": 0.007113773576075256, + "loss": 2.935084819793701, + "loss_ce": 0.007350385654717684, + "loss_iou": 1.09375, + "loss_num": 0.1494140625, + "loss_xval": 2.921875, + "num_input_tokens_seen": 5037228, + "step": 76 + }, + { + "epoch": 0.0072073758599709836, + "grad_norm": 13.165987014770508, + "learning_rate": 5e-05, + "loss": 2.9167, + "num_input_tokens_seen": 5103536, + "step": 77 + }, + { + "epoch": 0.0072073758599709836, + "loss": 3.1115217208862305, + "loss_ce": 0.00605290150269866, + "loss_iou": 1.125, + "loss_num": 0.1708984375, + "loss_xval": 3.109375, + "num_input_tokens_seen": 5103536, + "step": 77 + }, + { + "epoch": 0.007300978143866711, + "grad_norm": 15.973751068115234, + "learning_rate": 5e-05, + "loss": 2.8458, + "num_input_tokens_seen": 5170608, + "step": 78 + }, + { + "epoch": 0.007300978143866711, + "loss": 2.7468762397766113, + "loss_ce": 0.006641830783337355, + "loss_iou": 1.0234375, + "loss_num": 0.13671875, + "loss_xval": 2.734375, + "num_input_tokens_seen": 5170608, + "step": 78 + }, + { + "epoch": 0.007394580427762437, + "grad_norm": 15.901095390319824, + "learning_rate": 5e-05, + "loss": 2.8575, + "num_input_tokens_seen": 5235860, + "step": 79 + }, + { + "epoch": 0.007394580427762437, + "loss": 2.915633201599121, + "loss_ce": 0.011336389929056168, + "loss_iou": 1.109375, + "loss_num": 0.13671875, + "loss_xval": 2.90625, + "num_input_tokens_seen": 5235860, + "step": 79 + }, + { + "epoch": 0.007488182711658164, + "grad_norm": 12.038989067077637, + "learning_rate": 5e-05, + "loss": 2.8296, + "num_input_tokens_seen": 5302656, + "step": 80 + }, + { + "epoch": 0.007488182711658164, + "loss": 2.6915206909179688, + "loss_ce": 0.013786448165774345, + "loss_iou": 0.96484375, + "loss_num": 0.1484375, + "loss_xval": 2.671875, + "num_input_tokens_seen": 5302656, + "step": 80 + }, + { + "epoch": 0.007581784995553891, + "grad_norm": 11.164108276367188, + "learning_rate": 5e-05, + "loss": 2.3217, + "num_input_tokens_seen": 5368124, + "step": 81 + }, + { + "epoch": 0.007581784995553891, + "loss": 2.479562997817993, + "loss_ce": 0.003000450786203146, + "loss_iou": 0.9765625, + "loss_num": 0.1044921875, + "loss_xval": 2.46875, + "num_input_tokens_seen": 5368124, + "step": 81 + }, + { + "epoch": 0.0076753872794496185, + "grad_norm": 9.867695808410645, + "learning_rate": 5e-05, + "loss": 2.4559, + "num_input_tokens_seen": 5434644, + "step": 82 + }, + { + "epoch": 0.0076753872794496185, + "loss": 2.4299824237823486, + "loss_ce": 0.00505574606359005, + "loss_iou": 0.96875, + "loss_num": 0.09814453125, + "loss_xval": 2.421875, + "num_input_tokens_seen": 5434644, + "step": 82 + }, + { + "epoch": 0.007768989563345346, + "grad_norm": 20.0883846282959, + "learning_rate": 5e-05, + "loss": 2.5437, + "num_input_tokens_seen": 5500948, + "step": 83 + }, + { + "epoch": 0.007768989563345346, + "loss": 2.514523983001709, + "loss_ce": 0.010617696680128574, + "loss_iou": 1.0234375, + "loss_num": 0.0908203125, + "loss_xval": 2.5, + "num_input_tokens_seen": 5500948, + "step": 83 + }, + { + "epoch": 0.007862591847241073, + "grad_norm": 27.041717529296875, + "learning_rate": 5e-05, + "loss": 3.0171, + "num_input_tokens_seen": 5566960, + "step": 84 + }, + { + "epoch": 0.007862591847241073, + "loss": 3.0104644298553467, + "loss_ce": 0.020230058580636978, + "loss_iou": 1.21875, + "loss_num": 0.11083984375, + "loss_xval": 2.984375, + "num_input_tokens_seen": 5566960, + "step": 84 + }, + { + "epoch": 0.007956194131136799, + "grad_norm": 35.098602294921875, + "learning_rate": 5e-05, + "loss": 2.7361, + "num_input_tokens_seen": 5633568, + "step": 85 + }, + { + "epoch": 0.007956194131136799, + "loss": 2.7287986278533936, + "loss_ce": 0.004189261235296726, + "loss_iou": 1.140625, + "loss_num": 0.08984375, + "loss_xval": 2.71875, + "num_input_tokens_seen": 5633568, + "step": 85 + }, + { + "epoch": 0.008049796415032527, + "grad_norm": 14.407003402709961, + "learning_rate": 5e-05, + "loss": 2.904, + "num_input_tokens_seen": 5699680, + "step": 86 + }, + { + "epoch": 0.008049796415032527, + "loss": 2.864617109298706, + "loss_ce": 0.010124976746737957, + "loss_iou": 0.98046875, + "loss_num": 0.1796875, + "loss_xval": 2.859375, + "num_input_tokens_seen": 5699680, + "step": 86 + }, + { + "epoch": 0.008143398698928253, + "grad_norm": 10.01558780670166, + "learning_rate": 5e-05, + "loss": 2.5973, + "num_input_tokens_seen": 5765836, + "step": 87 + }, + { + "epoch": 0.008143398698928253, + "loss": 2.4706103801727295, + "loss_ce": 0.0038135177455842495, + "loss_iou": 0.890625, + "loss_num": 0.13671875, + "loss_xval": 2.46875, + "num_input_tokens_seen": 5765836, + "step": 87 + }, + { + "epoch": 0.008237000982823981, + "grad_norm": 31.901260375976562, + "learning_rate": 5e-05, + "loss": 2.6865, + "num_input_tokens_seen": 5832224, + "step": 88 + }, + { + "epoch": 0.008237000982823981, + "loss": 2.5909039974212646, + "loss_ce": 0.0030133752152323723, + "loss_iou": 1.0234375, + "loss_num": 0.10791015625, + "loss_xval": 2.59375, + "num_input_tokens_seen": 5832224, + "step": 88 + }, + { + "epoch": 0.008330603266719708, + "grad_norm": 29.9424991607666, + "learning_rate": 5e-05, + "loss": 2.29, + "num_input_tokens_seen": 5899164, + "step": 89 + }, + { + "epoch": 0.008330603266719708, + "loss": 2.2349627017974854, + "loss_ce": 0.006935363169759512, + "loss_iou": 0.87890625, + "loss_num": 0.09423828125, + "loss_xval": 2.234375, + "num_input_tokens_seen": 5899164, + "step": 89 + }, + { + "epoch": 0.008424205550615436, + "grad_norm": 30.49257469177246, + "learning_rate": 5e-05, + "loss": 2.8297, + "num_input_tokens_seen": 5966128, + "step": 90 + }, + { + "epoch": 0.008424205550615436, + "loss": 2.859210252761841, + "loss_ce": 0.009600731544196606, + "loss_iou": 1.09375, + "loss_num": 0.1328125, + "loss_xval": 2.84375, + "num_input_tokens_seen": 5966128, + "step": 90 + }, + { + "epoch": 0.008517807834511162, + "grad_norm": 31.240354537963867, + "learning_rate": 5e-05, + "loss": 2.7538, + "num_input_tokens_seen": 6032860, + "step": 91 + }, + { + "epoch": 0.008517807834511162, + "loss": 2.7331926822662354, + "loss_ce": 0.012489484623074532, + "loss_iou": 1.078125, + "loss_num": 0.11328125, + "loss_xval": 2.71875, + "num_input_tokens_seen": 6032860, + "step": 91 + }, + { + "epoch": 0.008611410118406888, + "grad_norm": 27.121564865112305, + "learning_rate": 5e-05, + "loss": 2.6359, + "num_input_tokens_seen": 6098444, + "step": 92 + }, + { + "epoch": 0.008611410118406888, + "loss": 2.57448148727417, + "loss_ce": 0.006122007966041565, + "loss_iou": 1.0078125, + "loss_num": 0.1103515625, + "loss_xval": 2.5625, + "num_input_tokens_seen": 6098444, + "step": 92 + }, + { + "epoch": 0.008705012402302616, + "grad_norm": 13.42473316192627, + "learning_rate": 5e-05, + "loss": 2.6054, + "num_input_tokens_seen": 6164956, + "step": 93 + }, + { + "epoch": 0.008705012402302616, + "loss": 2.515028715133667, + "loss_ce": 0.005263021681457758, + "loss_iou": 1.0234375, + "loss_num": 0.09326171875, + "loss_xval": 2.515625, + "num_input_tokens_seen": 6164956, + "step": 93 + }, + { + "epoch": 0.008798614686198343, + "grad_norm": 19.04734230041504, + "learning_rate": 5e-05, + "loss": 2.5692, + "num_input_tokens_seen": 6231608, + "step": 94 + }, + { + "epoch": 0.008798614686198343, + "loss": 2.5756468772888184, + "loss_ce": 0.005334290210157633, + "loss_iou": 1.03125, + "loss_num": 0.10302734375, + "loss_xval": 2.5625, + "num_input_tokens_seen": 6231608, + "step": 94 + }, + { + "epoch": 0.00889221697009407, + "grad_norm": 20.92302894592285, + "learning_rate": 5e-05, + "loss": 2.4289, + "num_input_tokens_seen": 6297636, + "step": 95 + }, + { + "epoch": 0.00889221697009407, + "loss": 2.3900952339172363, + "loss_ce": 0.007282722741365433, + "loss_iou": 0.9765625, + "loss_num": 0.08642578125, + "loss_xval": 2.375, + "num_input_tokens_seen": 6297636, + "step": 95 + }, + { + "epoch": 0.008985819253989797, + "grad_norm": 20.4478759765625, + "learning_rate": 5e-05, + "loss": 2.422, + "num_input_tokens_seen": 6363796, + "step": 96 + }, + { + "epoch": 0.008985819253989797, + "loss": 2.25628662109375, + "loss_ce": 0.010192908346652985, + "loss_iou": 0.8828125, + "loss_num": 0.09716796875, + "loss_xval": 2.25, + "num_input_tokens_seen": 6363796, + "step": 96 + }, + { + "epoch": 0.009079421537885525, + "grad_norm": 14.334249496459961, + "learning_rate": 5e-05, + "loss": 2.3518, + "num_input_tokens_seen": 6429864, + "step": 97 + }, + { + "epoch": 0.009079421537885525, + "loss": 2.1521637439727783, + "loss_ce": 0.0049469019286334515, + "loss_iou": 0.88671875, + "loss_num": 0.0751953125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 6429864, + "step": 97 + }, + { + "epoch": 0.009173023821781251, + "grad_norm": 31.936813354492188, + "learning_rate": 5e-05, + "loss": 2.4487, + "num_input_tokens_seen": 6496220, + "step": 98 + }, + { + "epoch": 0.009173023821781251, + "loss": 2.390045642852783, + "loss_ce": 0.007233068346977234, + "loss_iou": 1.015625, + "loss_num": 0.0693359375, + "loss_xval": 2.375, + "num_input_tokens_seen": 6496220, + "step": 98 + }, + { + "epoch": 0.00926662610567698, + "grad_norm": 17.394086837768555, + "learning_rate": 5e-05, + "loss": 2.8472, + "num_input_tokens_seen": 6562808, + "step": 99 + }, + { + "epoch": 0.00926662610567698, + "loss": 2.81095027923584, + "loss_ce": 0.008215953595936298, + "loss_iou": 1.09375, + "loss_num": 0.125, + "loss_xval": 2.796875, + "num_input_tokens_seen": 6562808, + "step": 99 + }, + { + "epoch": 0.009360228389572706, + "grad_norm": 10.863395690917969, + "learning_rate": 5e-05, + "loss": 2.1969, + "num_input_tokens_seen": 6627500, + "step": 100 + }, + { + "epoch": 0.009360228389572706, + "loss": 2.2767670154571533, + "loss_ce": 0.004672226030379534, + "loss_iou": 0.8984375, + "loss_num": 0.095703125, + "loss_xval": 2.265625, + "num_input_tokens_seen": 6627500, + "step": 100 + }, + { + "epoch": 0.009453830673468432, + "grad_norm": 46.18409729003906, + "learning_rate": 5e-05, + "loss": 2.5992, + "num_input_tokens_seen": 6693964, + "step": 101 + }, + { + "epoch": 0.009453830673468432, + "loss": 2.6916022300720215, + "loss_ce": 0.008008443750441074, + "loss_iou": 1.0546875, + "loss_num": 0.1142578125, + "loss_xval": 2.6875, + "num_input_tokens_seen": 6693964, + "step": 101 + }, + { + "epoch": 0.00954743295736416, + "grad_norm": 17.2650146484375, + "learning_rate": 5e-05, + "loss": 2.5906, + "num_input_tokens_seen": 6761124, + "step": 102 + }, + { + "epoch": 0.00954743295736416, + "loss": 2.5785131454467773, + "loss_ce": 0.004294442944228649, + "loss_iou": 1.09375, + "loss_num": 0.07861328125, + "loss_xval": 2.578125, + "num_input_tokens_seen": 6761124, + "step": 102 + }, + { + "epoch": 0.009641035241259886, + "grad_norm": 11.22831916809082, + "learning_rate": 5e-05, + "loss": 2.3741, + "num_input_tokens_seen": 6826968, + "step": 103 + }, + { + "epoch": 0.009641035241259886, + "loss": 2.296377182006836, + "loss_ce": 0.005361703224480152, + "loss_iou": 0.9296875, + "loss_num": 0.0869140625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 6826968, + "step": 103 + }, + { + "epoch": 0.009734637525155614, + "grad_norm": 35.63290023803711, + "learning_rate": 5e-05, + "loss": 2.5937, + "num_input_tokens_seen": 6893440, + "step": 104 + }, + { + "epoch": 0.009734637525155614, + "loss": 2.4925217628479004, + "loss_ce": 0.005217037629336119, + "loss_iou": 1.0703125, + "loss_num": 0.06884765625, + "loss_xval": 2.484375, + "num_input_tokens_seen": 6893440, + "step": 104 + }, + { + "epoch": 0.00982823980905134, + "grad_norm": 18.57401466369629, + "learning_rate": 5e-05, + "loss": 3.1896, + "num_input_tokens_seen": 6960672, + "step": 105 + }, + { + "epoch": 0.00982823980905134, + "loss": 3.2786853313446045, + "loss_ce": 0.007201008033007383, + "loss_iou": 1.2265625, + "loss_num": 0.1640625, + "loss_xval": 3.265625, + "num_input_tokens_seen": 6960672, + "step": 105 + }, + { + "epoch": 0.009921842092947069, + "grad_norm": 14.300521850585938, + "learning_rate": 5e-05, + "loss": 2.83, + "num_input_tokens_seen": 7026292, + "step": 106 + }, + { + "epoch": 0.009921842092947069, + "loss": 2.820474147796631, + "loss_ce": 0.004067921079695225, + "loss_iou": 1.0390625, + "loss_num": 0.146484375, + "loss_xval": 2.8125, + "num_input_tokens_seen": 7026292, + "step": 106 + }, + { + "epoch": 0.010015444376842795, + "grad_norm": 7.471635818481445, + "learning_rate": 5e-05, + "loss": 2.6847, + "num_input_tokens_seen": 7092416, + "step": 107 + }, + { + "epoch": 0.010015444376842795, + "loss": 2.739936113357544, + "loss_ce": 0.003608077298849821, + "loss_iou": 1.0390625, + "loss_num": 0.1328125, + "loss_xval": 2.734375, + "num_input_tokens_seen": 7092416, + "step": 107 + }, + { + "epoch": 0.010109046660738521, + "grad_norm": 18.97633934020996, + "learning_rate": 5e-05, + "loss": 2.5772, + "num_input_tokens_seen": 7158496, + "step": 108 + }, + { + "epoch": 0.010109046660738521, + "loss": 2.6946582794189453, + "loss_ce": 0.00520497839897871, + "loss_iou": 1.0546875, + "loss_num": 0.11474609375, + "loss_xval": 2.6875, + "num_input_tokens_seen": 7158496, + "step": 108 + }, + { + "epoch": 0.01020264894463425, + "grad_norm": 7.846622467041016, + "learning_rate": 5e-05, + "loss": 2.2517, + "num_input_tokens_seen": 7224048, + "step": 109 + }, + { + "epoch": 0.01020264894463425, + "loss": 2.1874606609344482, + "loss_ce": 0.003378791268914938, + "loss_iou": 0.859375, + "loss_num": 0.09228515625, + "loss_xval": 2.1875, + "num_input_tokens_seen": 7224048, + "step": 109 + }, + { + "epoch": 0.010296251228529976, + "grad_norm": 6.304441928863525, + "learning_rate": 5e-05, + "loss": 2.4298, + "num_input_tokens_seen": 7289960, + "step": 110 + }, + { + "epoch": 0.010296251228529976, + "loss": 2.305495262145996, + "loss_ce": 0.011550042778253555, + "loss_iou": 0.85546875, + "loss_num": 0.1162109375, + "loss_xval": 2.296875, + "num_input_tokens_seen": 7289960, + "step": 110 + }, + { + "epoch": 0.010389853512425704, + "grad_norm": 7.191917419433594, + "learning_rate": 5e-05, + "loss": 2.066, + "num_input_tokens_seen": 7355564, + "step": 111 + }, + { + "epoch": 0.010389853512425704, + "loss": 1.8617236614227295, + "loss_ce": 0.01125979796051979, + "loss_iou": 0.734375, + "loss_num": 0.0751953125, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 7355564, + "step": 111 + }, + { + "epoch": 0.01048345579632143, + "grad_norm": 20.665714263916016, + "learning_rate": 5e-05, + "loss": 2.3708, + "num_input_tokens_seen": 7421660, + "step": 112 + }, + { + "epoch": 0.01048345579632143, + "loss": 2.4235143661499023, + "loss_ce": 0.014334505423903465, + "loss_iou": 0.98828125, + "loss_num": 0.0859375, + "loss_xval": 2.40625, + "num_input_tokens_seen": 7421660, + "step": 112 + }, + { + "epoch": 0.010577058080217158, + "grad_norm": 15.690848350524902, + "learning_rate": 5e-05, + "loss": 2.3252, + "num_input_tokens_seen": 7488084, + "step": 113 + }, + { + "epoch": 0.010577058080217158, + "loss": 2.275463581085205, + "loss_ce": 0.0030024414882063866, + "loss_iou": 0.9609375, + "loss_num": 0.0703125, + "loss_xval": 2.265625, + "num_input_tokens_seen": 7488084, + "step": 113 + }, + { + "epoch": 0.010670660364112884, + "grad_norm": 42.13125991821289, + "learning_rate": 5e-05, + "loss": 2.6392, + "num_input_tokens_seen": 7555320, + "step": 114 + }, + { + "epoch": 0.010670660364112884, + "loss": 2.6008431911468506, + "loss_ce": 0.007093184161931276, + "loss_iou": 1.140625, + "loss_num": 0.06396484375, + "loss_xval": 2.59375, + "num_input_tokens_seen": 7555320, + "step": 114 + }, + { + "epoch": 0.010764262648008612, + "grad_norm": 11.553654670715332, + "learning_rate": 5e-05, + "loss": 2.9421, + "num_input_tokens_seen": 7622696, + "step": 115 + }, + { + "epoch": 0.010764262648008612, + "loss": 3.0041770935058594, + "loss_ce": 0.006130222696810961, + "loss_iou": 1.15625, + "loss_num": 0.13671875, + "loss_xval": 3.0, + "num_input_tokens_seen": 7622696, + "step": 115 + }, + { + "epoch": 0.010857864931904338, + "grad_norm": 30.39330291748047, + "learning_rate": 5e-05, + "loss": 2.9718, + "num_input_tokens_seen": 7689676, + "step": 116 + }, + { + "epoch": 0.010857864931904338, + "loss": 3.096498966217041, + "loss_ce": 0.008608417585492134, + "loss_iou": 1.2109375, + "loss_num": 0.1328125, + "loss_xval": 3.09375, + "num_input_tokens_seen": 7689676, + "step": 116 + }, + { + "epoch": 0.010951467215800065, + "grad_norm": 9.5101957321167, + "learning_rate": 5e-05, + "loss": 2.5344, + "num_input_tokens_seen": 7755928, + "step": 117 + }, + { + "epoch": 0.010951467215800065, + "loss": 2.370011806488037, + "loss_ce": 0.0052659399807453156, + "loss_iou": 0.92578125, + "loss_num": 0.10302734375, + "loss_xval": 2.359375, + "num_input_tokens_seen": 7755928, + "step": 117 + }, + { + "epoch": 0.011045069499695793, + "grad_norm": 10.531336784362793, + "learning_rate": 5e-05, + "loss": 2.4916, + "num_input_tokens_seen": 7821848, + "step": 118 + }, + { + "epoch": 0.011045069499695793, + "loss": 2.4239916801452637, + "loss_ce": 0.0074876840226352215, + "loss_iou": 0.96875, + "loss_num": 0.09619140625, + "loss_xval": 2.421875, + "num_input_tokens_seen": 7821848, + "step": 118 + }, + { + "epoch": 0.011138671783591519, + "grad_norm": 12.39312744140625, + "learning_rate": 5e-05, + "loss": 2.2888, + "num_input_tokens_seen": 7887764, + "step": 119 + }, + { + "epoch": 0.011138671783591519, + "loss": 2.2058448791503906, + "loss_ce": 0.004673070274293423, + "loss_iou": 0.890625, + "loss_num": 0.083984375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 7887764, + "step": 119 + }, + { + "epoch": 0.011232274067487247, + "grad_norm": 15.486209869384766, + "learning_rate": 5e-05, + "loss": 2.3216, + "num_input_tokens_seen": 7953732, + "step": 120 + }, + { + "epoch": 0.011232274067487247, + "loss": 2.1480751037597656, + "loss_ce": 0.009403292089700699, + "loss_iou": 0.88671875, + "loss_num": 0.07275390625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 7953732, + "step": 120 + }, + { + "epoch": 0.011325876351382973, + "grad_norm": 12.126895904541016, + "learning_rate": 5e-05, + "loss": 2.3391, + "num_input_tokens_seen": 8019224, + "step": 121 + }, + { + "epoch": 0.011325876351382973, + "loss": 2.291839838027954, + "loss_ce": 0.006683724001049995, + "loss_iou": 0.97265625, + "loss_num": 0.068359375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 8019224, + "step": 121 + }, + { + "epoch": 0.011419478635278701, + "grad_norm": 40.47333908081055, + "learning_rate": 5e-05, + "loss": 2.738, + "num_input_tokens_seen": 8086356, + "step": 122 + }, + { + "epoch": 0.011419478635278701, + "loss": 2.6039280891418457, + "loss_ce": 0.0033420585095882416, + "loss_iou": 1.171875, + "loss_num": 0.052001953125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 8086356, + "step": 122 + }, + { + "epoch": 0.011513080919174428, + "grad_norm": 15.583828926086426, + "learning_rate": 5e-05, + "loss": 3.0616, + "num_input_tokens_seen": 8153056, + "step": 123 + }, + { + "epoch": 0.011513080919174428, + "loss": 2.9486942291259766, + "loss_ce": 0.01314749475568533, + "loss_iou": 1.0234375, + "loss_num": 0.177734375, + "loss_xval": 2.9375, + "num_input_tokens_seen": 8153056, + "step": 123 + }, + { + "epoch": 0.011606683203070156, + "grad_norm": 10.330870628356934, + "learning_rate": 5e-05, + "loss": 2.8413, + "num_input_tokens_seen": 8218476, + "step": 124 + }, + { + "epoch": 0.011606683203070156, + "loss": 2.6871485710144043, + "loss_ce": 0.013320403173565865, + "loss_iou": 0.9765625, + "loss_num": 0.14453125, + "loss_xval": 2.671875, + "num_input_tokens_seen": 8218476, + "step": 124 + }, + { + "epoch": 0.011700285486965882, + "grad_norm": 9.994678497314453, + "learning_rate": 5e-05, + "loss": 2.7807, + "num_input_tokens_seen": 8285120, + "step": 125 + }, + { + "epoch": 0.011700285486965882, + "loss": 2.8563404083251953, + "loss_ce": 0.006731036584824324, + "loss_iou": 1.0546875, + "loss_num": 0.150390625, + "loss_xval": 2.84375, + "num_input_tokens_seen": 8285120, + "step": 125 + }, + { + "epoch": 0.011793887770861608, + "grad_norm": 7.227442264556885, + "learning_rate": 5e-05, + "loss": 2.472, + "num_input_tokens_seen": 8351572, + "step": 126 + }, + { + "epoch": 0.011793887770861608, + "loss": 2.2409982681274414, + "loss_ce": 0.0075998734682798386, + "loss_iou": 0.83984375, + "loss_num": 0.1103515625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 8351572, + "step": 126 + }, + { + "epoch": 0.011887490054757336, + "grad_norm": 7.387115478515625, + "learning_rate": 5e-05, + "loss": 2.6037, + "num_input_tokens_seen": 8419100, + "step": 127 + }, + { + "epoch": 0.011887490054757336, + "loss": 2.6546459197998047, + "loss_ce": 0.004255238920450211, + "loss_iou": 1.0, + "loss_num": 0.130859375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 8419100, + "step": 127 + }, + { + "epoch": 0.011981092338653063, + "grad_norm": 6.874162197113037, + "learning_rate": 5e-05, + "loss": 2.4978, + "num_input_tokens_seen": 8486096, + "step": 128 + }, + { + "epoch": 0.011981092338653063, + "loss": 2.5121519565582275, + "loss_ce": 0.008245638571679592, + "loss_iou": 0.9921875, + "loss_num": 0.103515625, + "loss_xval": 2.5, + "num_input_tokens_seen": 8486096, + "step": 128 + }, + { + "epoch": 0.01207469462254879, + "grad_norm": 7.546186923980713, + "learning_rate": 5e-05, + "loss": 2.4236, + "num_input_tokens_seen": 8552400, + "step": 129 + }, + { + "epoch": 0.01207469462254879, + "loss": 2.4138965606689453, + "loss_ce": 0.0017871541203930974, + "loss_iou": 0.98046875, + "loss_num": 0.0908203125, + "loss_xval": 2.40625, + "num_input_tokens_seen": 8552400, + "step": 129 + }, + { + "epoch": 0.012168296906444517, + "grad_norm": 13.933201789855957, + "learning_rate": 5e-05, + "loss": 2.4146, + "num_input_tokens_seen": 8617660, + "step": 130 + }, + { + "epoch": 0.012168296906444517, + "loss": 2.359180450439453, + "loss_ce": 0.006641239859163761, + "loss_iou": 0.9765625, + "loss_num": 0.080078125, + "loss_xval": 2.359375, + "num_input_tokens_seen": 8617660, + "step": 130 + }, + { + "epoch": 0.012261899190340245, + "grad_norm": 13.758872985839844, + "learning_rate": 5e-05, + "loss": 2.2476, + "num_input_tokens_seen": 8684492, + "step": 131 + }, + { + "epoch": 0.012261899190340245, + "loss": 2.2961173057556152, + "loss_ce": 0.006566723342984915, + "loss_iou": 0.921875, + "loss_num": 0.08935546875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 8684492, + "step": 131 + }, + { + "epoch": 0.012355501474235971, + "grad_norm": 34.28789138793945, + "learning_rate": 5e-05, + "loss": 2.4416, + "num_input_tokens_seen": 8750736, + "step": 132 + }, + { + "epoch": 0.012355501474235971, + "loss": 2.599449634552002, + "loss_ce": 0.007652592845261097, + "loss_iou": 1.0859375, + "loss_num": 0.083984375, + "loss_xval": 2.59375, + "num_input_tokens_seen": 8750736, + "step": 132 + }, + { + "epoch": 0.012449103758131698, + "grad_norm": 16.799196243286133, + "learning_rate": 5e-05, + "loss": 2.6894, + "num_input_tokens_seen": 8817252, + "step": 133 + }, + { + "epoch": 0.012449103758131698, + "loss": 2.734215259552002, + "loss_ce": 0.0037463903427124023, + "loss_iou": 1.09375, + "loss_num": 0.10888671875, + "loss_xval": 2.734375, + "num_input_tokens_seen": 8817252, + "step": 133 + }, + { + "epoch": 0.012542706042027426, + "grad_norm": 17.619638442993164, + "learning_rate": 5e-05, + "loss": 2.0357, + "num_input_tokens_seen": 8883412, + "step": 134 + }, + { + "epoch": 0.012542706042027426, + "loss": 1.9238464832305908, + "loss_ce": 0.002459696726873517, + "loss_iou": 0.8046875, + "loss_num": 0.0625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 8883412, + "step": 134 + }, + { + "epoch": 0.012636308325923152, + "grad_norm": 20.350069046020508, + "learning_rate": 5e-05, + "loss": 2.3698, + "num_input_tokens_seen": 8950140, + "step": 135 + }, + { + "epoch": 0.012636308325923152, + "loss": 2.399346351623535, + "loss_ce": 0.005791927687823772, + "loss_iou": 1.0234375, + "loss_num": 0.07080078125, + "loss_xval": 2.390625, + "num_input_tokens_seen": 8950140, + "step": 135 + }, + { + "epoch": 0.01272991060981888, + "grad_norm": 14.986017227172852, + "learning_rate": 5e-05, + "loss": 2.6098, + "num_input_tokens_seen": 9017656, + "step": 136 + }, + { + "epoch": 0.01272991060981888, + "loss": 2.6524224281311035, + "loss_ce": 0.005938143003731966, + "loss_iou": 1.078125, + "loss_num": 0.099609375, + "loss_xval": 2.640625, + "num_input_tokens_seen": 9017656, + "step": 136 + }, + { + "epoch": 0.012823512893714606, + "grad_norm": 17.91053581237793, + "learning_rate": 5e-05, + "loss": 2.3665, + "num_input_tokens_seen": 9083428, + "step": 137 + }, + { + "epoch": 0.012823512893714606, + "loss": 2.3613765239715576, + "loss_ce": 0.00395461730659008, + "loss_iou": 0.98828125, + "loss_num": 0.0771484375, + "loss_xval": 2.359375, + "num_input_tokens_seen": 9083428, + "step": 137 + }, + { + "epoch": 0.012917115177610334, + "grad_norm": 17.348831176757812, + "learning_rate": 5e-05, + "loss": 2.3659, + "num_input_tokens_seen": 9150248, + "step": 138 + }, + { + "epoch": 0.012917115177610334, + "loss": 2.268336772918701, + "loss_ce": 0.008571156300604343, + "loss_iou": 0.96875, + "loss_num": 0.06494140625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 9150248, + "step": 138 + }, + { + "epoch": 0.01301071746150606, + "grad_norm": 16.304168701171875, + "learning_rate": 5e-05, + "loss": 2.7852, + "num_input_tokens_seen": 9216832, + "step": 139 + }, + { + "epoch": 0.01301071746150606, + "loss": 2.989776849746704, + "loss_ce": 0.005401725880801678, + "loss_iou": 1.234375, + "loss_num": 0.1025390625, + "loss_xval": 2.984375, + "num_input_tokens_seen": 9216832, + "step": 139 + }, + { + "epoch": 0.013104319745401789, + "grad_norm": 6.476009368896484, + "learning_rate": 5e-05, + "loss": 2.5077, + "num_input_tokens_seen": 9282932, + "step": 140 + }, + { + "epoch": 0.013104319745401789, + "loss": 2.4586074352264404, + "loss_ce": 0.007435602601617575, + "loss_iou": 0.9921875, + "loss_num": 0.09326171875, + "loss_xval": 2.453125, + "num_input_tokens_seen": 9282932, + "step": 140 + }, + { + "epoch": 0.013197922029297515, + "grad_norm": 20.276348114013672, + "learning_rate": 5e-05, + "loss": 2.2915, + "num_input_tokens_seen": 9349952, + "step": 141 + }, + { + "epoch": 0.013197922029297515, + "loss": 2.3907690048217773, + "loss_ce": 0.006003286689519882, + "loss_iou": 0.984375, + "loss_num": 0.08349609375, + "loss_xval": 2.390625, + "num_input_tokens_seen": 9349952, + "step": 141 + }, + { + "epoch": 0.013291524313193241, + "grad_norm": 19.617143630981445, + "learning_rate": 5e-05, + "loss": 2.3906, + "num_input_tokens_seen": 9415312, + "step": 142 + }, + { + "epoch": 0.013291524313193241, + "loss": 2.362905740737915, + "loss_ce": 0.0054837800562381744, + "loss_iou": 0.98046875, + "loss_num": 0.0791015625, + "loss_xval": 2.359375, + "num_input_tokens_seen": 9415312, + "step": 142 + }, + { + "epoch": 0.01338512659708897, + "grad_norm": 27.583656311035156, + "learning_rate": 5e-05, + "loss": 2.3203, + "num_input_tokens_seen": 9482276, + "step": 143 + }, + { + "epoch": 0.01338512659708897, + "loss": 2.3812010288238525, + "loss_ce": 0.006201109383255243, + "loss_iou": 1.0234375, + "loss_num": 0.06591796875, + "loss_xval": 2.375, + "num_input_tokens_seen": 9482276, + "step": 143 + }, + { + "epoch": 0.013478728880984696, + "grad_norm": 11.42666244506836, + "learning_rate": 5e-05, + "loss": 2.9603, + "num_input_tokens_seen": 9548436, + "step": 144 + }, + { + "epoch": 0.013478728880984696, + "loss": 3.061565637588501, + "loss_ce": 0.010784330777823925, + "loss_iou": 1.1875, + "loss_num": 0.1337890625, + "loss_xval": 3.046875, + "num_input_tokens_seen": 9548436, + "step": 144 + }, + { + "epoch": 0.013572331164880424, + "grad_norm": 13.747142791748047, + "learning_rate": 5e-05, + "loss": 2.7446, + "num_input_tokens_seen": 9615716, + "step": 145 + }, + { + "epoch": 0.013572331164880424, + "loss": 2.8178625106811523, + "loss_ce": 0.007315758150070906, + "loss_iou": 1.109375, + "loss_num": 0.11962890625, + "loss_xval": 2.8125, + "num_input_tokens_seen": 9615716, + "step": 145 + }, + { + "epoch": 0.01366593344877615, + "grad_norm": 8.339811325073242, + "learning_rate": 5e-05, + "loss": 2.1399, + "num_input_tokens_seen": 9681096, + "step": 146 + }, + { + "epoch": 0.01366593344877615, + "loss": 2.1111319065093994, + "loss_ce": 0.008592926897108555, + "loss_iou": 0.859375, + "loss_num": 0.076171875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 9681096, + "step": 146 + }, + { + "epoch": 0.013759535732671878, + "grad_norm": 14.685997009277344, + "learning_rate": 5e-05, + "loss": 2.4779, + "num_input_tokens_seen": 9747872, + "step": 147 + }, + { + "epoch": 0.013759535732671878, + "loss": 2.411579132080078, + "loss_ce": 0.006305626593530178, + "loss_iou": 0.98828125, + "loss_num": 0.0869140625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 9747872, + "step": 147 + }, + { + "epoch": 0.013853138016567604, + "grad_norm": 10.855498313903809, + "learning_rate": 5e-05, + "loss": 1.8449, + "num_input_tokens_seen": 9813032, + "step": 148 + }, + { + "epoch": 0.013853138016567604, + "loss": 1.9836018085479736, + "loss_ce": 0.004109744913876057, + "loss_iou": 0.83203125, + "loss_num": 0.0634765625, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 9813032, + "step": 148 + }, + { + "epoch": 0.013946740300463332, + "grad_norm": 39.248653411865234, + "learning_rate": 5e-05, + "loss": 2.6402, + "num_input_tokens_seen": 9878828, + "step": 149 + }, + { + "epoch": 0.013946740300463332, + "loss": 2.7219836711883545, + "loss_ce": 0.006163286045193672, + "loss_iou": 1.171875, + "loss_num": 0.07421875, + "loss_xval": 2.71875, + "num_input_tokens_seen": 9878828, + "step": 149 + }, + { + "epoch": 0.014040342584359058, + "grad_norm": 10.349116325378418, + "learning_rate": 5e-05, + "loss": 2.4491, + "num_input_tokens_seen": 9944760, + "step": 150 + }, + { + "epoch": 0.014040342584359058, + "loss": 2.5953550338745117, + "loss_ce": 0.006976166274398565, + "loss_iou": 0.9765625, + "loss_num": 0.126953125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 9944760, + "step": 150 + }, + { + "epoch": 0.014133944868254785, + "grad_norm": 16.034276962280273, + "learning_rate": 5e-05, + "loss": 2.8513, + "num_input_tokens_seen": 10011116, + "step": 151 + }, + { + "epoch": 0.014133944868254785, + "loss": 3.006396770477295, + "loss_ce": 0.010302877053618431, + "loss_iou": 1.140625, + "loss_num": 0.142578125, + "loss_xval": 3.0, + "num_input_tokens_seen": 10011116, + "step": 151 + }, + { + "epoch": 0.014227547152150513, + "grad_norm": 10.876625061035156, + "learning_rate": 5e-05, + "loss": 2.661, + "num_input_tokens_seen": 10077456, + "step": 152 + }, + { + "epoch": 0.014227547152150513, + "loss": 2.6819534301757812, + "loss_ce": 0.006172118242830038, + "loss_iou": 1.0625, + "loss_num": 0.1103515625, + "loss_xval": 2.671875, + "num_input_tokens_seen": 10077456, + "step": 152 + }, + { + "epoch": 0.014321149436046239, + "grad_norm": 12.449788093566895, + "learning_rate": 5e-05, + "loss": 2.538, + "num_input_tokens_seen": 10142392, + "step": 153 + }, + { + "epoch": 0.014321149436046239, + "loss": 2.5689287185668945, + "loss_ce": 0.008381940424442291, + "loss_iou": 1.03125, + "loss_num": 0.09814453125, + "loss_xval": 2.5625, + "num_input_tokens_seen": 10142392, + "step": 153 + }, + { + "epoch": 0.014414751719941967, + "grad_norm": 8.446009635925293, + "learning_rate": 5e-05, + "loss": 2.6359, + "num_input_tokens_seen": 10208352, + "step": 154 + }, + { + "epoch": 0.014414751719941967, + "loss": 2.5929999351501465, + "loss_ce": 0.0031562570948153734, + "loss_iou": 1.0390625, + "loss_num": 0.1025390625, + "loss_xval": 2.59375, + "num_input_tokens_seen": 10208352, + "step": 154 + }, + { + "epoch": 0.014508354003837693, + "grad_norm": 10.110790252685547, + "learning_rate": 5e-05, + "loss": 2.3568, + "num_input_tokens_seen": 10274700, + "step": 155 + }, + { + "epoch": 0.014508354003837693, + "loss": 2.2932240962982178, + "loss_ce": 0.0041616931557655334, + "loss_iou": 0.9453125, + "loss_num": 0.07958984375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 10274700, + "step": 155 + }, + { + "epoch": 0.014601956287733421, + "grad_norm": 9.76326847076416, + "learning_rate": 5e-05, + "loss": 2.4006, + "num_input_tokens_seen": 10340968, + "step": 156 + }, + { + "epoch": 0.014601956287733421, + "loss": 2.31691837310791, + "loss_ce": 0.002465251600369811, + "loss_iou": 0.98828125, + "loss_num": 0.0673828125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 10340968, + "step": 156 + }, + { + "epoch": 0.014695558571629148, + "grad_norm": 9.665358543395996, + "learning_rate": 5e-05, + "loss": 2.3008, + "num_input_tokens_seen": 10407364, + "step": 157 + }, + { + "epoch": 0.014695558571629148, + "loss": 2.257051944732666, + "loss_ce": 0.003145768539980054, + "loss_iou": 0.94921875, + "loss_num": 0.0712890625, + "loss_xval": 2.25, + "num_input_tokens_seen": 10407364, + "step": 157 + }, + { + "epoch": 0.014789160855524874, + "grad_norm": 21.795377731323242, + "learning_rate": 5e-05, + "loss": 2.4966, + "num_input_tokens_seen": 10473436, + "step": 158 + }, + { + "epoch": 0.014789160855524874, + "loss": 2.6553800106048584, + "loss_ce": 0.003036240115761757, + "loss_iou": 1.1875, + "loss_num": 0.055908203125, + "loss_xval": 2.65625, + "num_input_tokens_seen": 10473436, + "step": 158 + }, + { + "epoch": 0.014882763139420602, + "grad_norm": 18.09992027282715, + "learning_rate": 5e-05, + "loss": 2.7492, + "num_input_tokens_seen": 10539464, + "step": 159 + }, + { + "epoch": 0.014882763139420602, + "loss": 2.6252808570861816, + "loss_ce": 0.0051638283766806126, + "loss_iou": 0.98046875, + "loss_num": 0.1328125, + "loss_xval": 2.625, + "num_input_tokens_seen": 10539464, + "step": 159 + }, + { + "epoch": 0.014976365423316328, + "grad_norm": 7.850207328796387, + "learning_rate": 5e-05, + "loss": 2.5368, + "num_input_tokens_seen": 10606284, + "step": 160 + }, + { + "epoch": 0.014976365423316328, + "loss": 2.542900800704956, + "loss_ce": 0.007744714617729187, + "loss_iou": 1.0, + "loss_num": 0.107421875, + "loss_xval": 2.53125, + "num_input_tokens_seen": 10606284, + "step": 160 + }, + { + "epoch": 0.015069967707212056, + "grad_norm": 23.272611618041992, + "learning_rate": 5e-05, + "loss": 2.3709, + "num_input_tokens_seen": 10671240, + "step": 161 + }, + { + "epoch": 0.015069967707212056, + "loss": 2.5069596767425537, + "loss_ce": 0.006959730759263039, + "loss_iou": 0.9921875, + "loss_num": 0.10400390625, + "loss_xval": 2.5, + "num_input_tokens_seen": 10671240, + "step": 161 + }, + { + "epoch": 0.015163569991107783, + "grad_norm": 9.053507804870605, + "learning_rate": 5e-05, + "loss": 2.3128, + "num_input_tokens_seen": 10737284, + "step": 162 + }, + { + "epoch": 0.015163569991107783, + "loss": 2.2194528579711914, + "loss_ce": 0.007905061356723309, + "loss_iou": 0.89453125, + "loss_num": 0.08447265625, + "loss_xval": 2.21875, + "num_input_tokens_seen": 10737284, + "step": 162 + }, + { + "epoch": 0.01525717227500351, + "grad_norm": 10.071734428405762, + "learning_rate": 5e-05, + "loss": 2.2584, + "num_input_tokens_seen": 10802660, + "step": 163 + }, + { + "epoch": 0.01525717227500351, + "loss": 2.2959604263305664, + "loss_ce": 0.008850975893437862, + "loss_iou": 0.96875, + "loss_num": 0.0693359375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 10802660, + "step": 163 + }, + { + "epoch": 0.015350774558899237, + "grad_norm": 28.983577728271484, + "learning_rate": 5e-05, + "loss": 2.4874, + "num_input_tokens_seen": 10867884, + "step": 164 + }, + { + "epoch": 0.015350774558899237, + "loss": 2.6018126010894775, + "loss_ce": 0.006109514273703098, + "loss_iou": 1.125, + "loss_num": 0.068359375, + "loss_xval": 2.59375, + "num_input_tokens_seen": 10867884, + "step": 164 + }, + { + "epoch": 0.015444376842794965, + "grad_norm": 13.433588981628418, + "learning_rate": 5e-05, + "loss": 2.6725, + "num_input_tokens_seen": 10934420, + "step": 165 + }, + { + "epoch": 0.015444376842794965, + "loss": 2.5403361320495605, + "loss_ce": 0.008109445683658123, + "loss_iou": 0.98046875, + "loss_num": 0.11376953125, + "loss_xval": 2.53125, + "num_input_tokens_seen": 10934420, + "step": 165 + }, + { + "epoch": 0.015537979126690691, + "grad_norm": 7.735687732696533, + "learning_rate": 5e-05, + "loss": 2.409, + "num_input_tokens_seen": 10999412, + "step": 166 + }, + { + "epoch": 0.015537979126690691, + "loss": 2.1762490272521973, + "loss_ce": 0.008280330337584019, + "loss_iou": 0.84375, + "loss_num": 0.095703125, + "loss_xval": 2.171875, + "num_input_tokens_seen": 10999412, + "step": 166 + }, + { + "epoch": 0.015631581410586418, + "grad_norm": 7.087269306182861, + "learning_rate": 5e-05, + "loss": 2.2915, + "num_input_tokens_seen": 11065368, + "step": 167 + }, + { + "epoch": 0.015631581410586418, + "loss": 2.211134433746338, + "loss_ce": 0.0038590305484831333, + "loss_iou": 0.8984375, + "loss_num": 0.08154296875, + "loss_xval": 2.203125, + "num_input_tokens_seen": 11065368, + "step": 167 + }, + { + "epoch": 0.015725183694482146, + "grad_norm": 6.687474727630615, + "learning_rate": 5e-05, + "loss": 2.0499, + "num_input_tokens_seen": 11131428, + "step": 168 + }, + { + "epoch": 0.015725183694482146, + "loss": 1.9903085231781006, + "loss_ce": 0.003980449866503477, + "loss_iou": 0.8046875, + "loss_num": 0.07568359375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 11131428, + "step": 168 + }, + { + "epoch": 0.015818785978377874, + "grad_norm": 24.095243453979492, + "learning_rate": 5e-05, + "loss": 2.3104, + "num_input_tokens_seen": 11197824, + "step": 169 + }, + { + "epoch": 0.015818785978377874, + "loss": 2.221169948577881, + "loss_ce": 0.004373104777187109, + "loss_iou": 0.98046875, + "loss_num": 0.05126953125, + "loss_xval": 2.21875, + "num_input_tokens_seen": 11197824, + "step": 169 + }, + { + "epoch": 0.015912388262273598, + "grad_norm": 19.663116455078125, + "learning_rate": 5e-05, + "loss": 2.2558, + "num_input_tokens_seen": 11263620, + "step": 170 + }, + { + "epoch": 0.015912388262273598, + "loss": 2.362255096435547, + "loss_ce": 0.0038566177245229483, + "loss_iou": 1.0078125, + "loss_num": 0.0673828125, + "loss_xval": 2.359375, + "num_input_tokens_seen": 11263620, + "step": 170 + }, + { + "epoch": 0.016005990546169326, + "grad_norm": 17.370332717895508, + "learning_rate": 5e-05, + "loss": 2.2358, + "num_input_tokens_seen": 11329808, + "step": 171 + }, + { + "epoch": 0.016005990546169326, + "loss": 2.3029870986938477, + "loss_ce": 0.0022056836169213057, + "loss_iou": 1.0078125, + "loss_num": 0.05810546875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 11329808, + "step": 171 + }, + { + "epoch": 0.016099592830065054, + "grad_norm": 10.88261890411377, + "learning_rate": 5e-05, + "loss": 2.6163, + "num_input_tokens_seen": 11395504, + "step": 172 + }, + { + "epoch": 0.016099592830065054, + "loss": 2.5592880249023438, + "loss_ce": 0.006553507875651121, + "loss_iou": 1.0546875, + "loss_num": 0.08935546875, + "loss_xval": 2.546875, + "num_input_tokens_seen": 11395504, + "step": 172 + }, + { + "epoch": 0.016193195113960782, + "grad_norm": 9.813773155212402, + "learning_rate": 5e-05, + "loss": 2.2342, + "num_input_tokens_seen": 11461240, + "step": 173 + }, + { + "epoch": 0.016193195113960782, + "loss": 2.2554492950439453, + "loss_ce": 0.005449455231428146, + "loss_iou": 0.9609375, + "loss_num": 0.06640625, + "loss_xval": 2.25, + "num_input_tokens_seen": 11461240, + "step": 173 + }, + { + "epoch": 0.016286797397856507, + "grad_norm": 30.811744689941406, + "learning_rate": 5e-05, + "loss": 2.3332, + "num_input_tokens_seen": 11527512, + "step": 174 + }, + { + "epoch": 0.016286797397856507, + "loss": 2.167771816253662, + "loss_ce": 0.004686021711677313, + "loss_iou": 0.890625, + "loss_num": 0.07568359375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 11527512, + "step": 174 + }, + { + "epoch": 0.016380399681752235, + "grad_norm": 10.913748741149902, + "learning_rate": 5e-05, + "loss": 2.121, + "num_input_tokens_seen": 11592812, + "step": 175 + }, + { + "epoch": 0.016380399681752235, + "loss": 1.9444025754928589, + "loss_ce": 0.0037287739105522633, + "loss_iou": 0.7578125, + "loss_num": 0.08544921875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 11592812, + "step": 175 + }, + { + "epoch": 0.016474001965647963, + "grad_norm": 8.592153549194336, + "learning_rate": 5e-05, + "loss": 2.4228, + "num_input_tokens_seen": 11659136, + "step": 176 + }, + { + "epoch": 0.016474001965647963, + "loss": 2.4883460998535156, + "loss_ce": 0.00397090008482337, + "loss_iou": 1.015625, + "loss_num": 0.08984375, + "loss_xval": 2.484375, + "num_input_tokens_seen": 11659136, + "step": 176 + }, + { + "epoch": 0.016567604249543687, + "grad_norm": 12.420134544372559, + "learning_rate": 5e-05, + "loss": 2.1218, + "num_input_tokens_seen": 11724716, + "step": 177 + }, + { + "epoch": 0.016567604249543687, + "loss": 2.189643144607544, + "loss_ce": 0.0021431802306324244, + "loss_iou": 0.953125, + "loss_num": 0.055419921875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 11724716, + "step": 177 + }, + { + "epoch": 0.016661206533439415, + "grad_norm": 16.79232406616211, + "learning_rate": 5e-05, + "loss": 2.1733, + "num_input_tokens_seen": 11790176, + "step": 178 + }, + { + "epoch": 0.016661206533439415, + "loss": 2.0610435009002686, + "loss_ce": 0.0035484700929373503, + "loss_iou": 0.89453125, + "loss_num": 0.053955078125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 11790176, + "step": 178 + }, + { + "epoch": 0.016754808817335144, + "grad_norm": 13.537357330322266, + "learning_rate": 5e-05, + "loss": 2.1361, + "num_input_tokens_seen": 11855320, + "step": 179 + }, + { + "epoch": 0.016754808817335144, + "loss": 2.492384910583496, + "loss_ce": 0.0060567706823349, + "loss_iou": 1.0234375, + "loss_num": 0.0869140625, + "loss_xval": 2.484375, + "num_input_tokens_seen": 11855320, + "step": 179 + }, + { + "epoch": 0.01684841110123087, + "grad_norm": 15.200262069702148, + "learning_rate": 5e-05, + "loss": 2.504, + "num_input_tokens_seen": 11921980, + "step": 180 + }, + { + "epoch": 0.01684841110123087, + "loss": 2.3923120498657227, + "loss_ce": 0.00364036881364882, + "loss_iou": 0.99609375, + "loss_num": 0.080078125, + "loss_xval": 2.390625, + "num_input_tokens_seen": 11921980, + "step": 180 + }, + { + "epoch": 0.016942013385126596, + "grad_norm": 12.651123046875, + "learning_rate": 5e-05, + "loss": 2.11, + "num_input_tokens_seen": 11988024, + "step": 181 + }, + { + "epoch": 0.016942013385126596, + "loss": 1.9797072410583496, + "loss_ce": 0.008760059252381325, + "loss_iou": 0.828125, + "loss_num": 0.0615234375, + "loss_xval": 1.96875, + "num_input_tokens_seen": 11988024, + "step": 181 + }, + { + "epoch": 0.017035615669022324, + "grad_norm": 13.741872787475586, + "learning_rate": 5e-05, + "loss": 2.6191, + "num_input_tokens_seen": 12054344, + "step": 182 + }, + { + "epoch": 0.017035615669022324, + "loss": 2.6435494422912598, + "loss_ce": 0.004877523984760046, + "loss_iou": 1.140625, + "loss_num": 0.072265625, + "loss_xval": 2.640625, + "num_input_tokens_seen": 12054344, + "step": 182 + }, + { + "epoch": 0.017129217952918052, + "grad_norm": 16.598339080810547, + "learning_rate": 5e-05, + "loss": 2.4599, + "num_input_tokens_seen": 12119892, + "step": 183 + }, + { + "epoch": 0.017129217952918052, + "loss": 2.4329566955566406, + "loss_ce": 0.003269173437729478, + "loss_iou": 1.0234375, + "loss_num": 0.076171875, + "loss_xval": 2.4375, + "num_input_tokens_seen": 12119892, + "step": 183 + }, + { + "epoch": 0.017222820236813777, + "grad_norm": 12.140115737915039, + "learning_rate": 5e-05, + "loss": 2.392, + "num_input_tokens_seen": 12185780, + "step": 184 + }, + { + "epoch": 0.017222820236813777, + "loss": 2.060917377471924, + "loss_ce": 0.00354421092197299, + "loss_iou": 0.859375, + "loss_num": 0.0673828125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 12185780, + "step": 184 + }, + { + "epoch": 0.017316422520709505, + "grad_norm": 21.703996658325195, + "learning_rate": 5e-05, + "loss": 2.3705, + "num_input_tokens_seen": 12251676, + "step": 185 + }, + { + "epoch": 0.017316422520709505, + "loss": 2.2541608810424805, + "loss_ce": 0.00611409917473793, + "loss_iou": 0.9765625, + "loss_num": 0.059814453125, + "loss_xval": 2.25, + "num_input_tokens_seen": 12251676, + "step": 185 + }, + { + "epoch": 0.017410024804605233, + "grad_norm": 14.7355375289917, + "learning_rate": 5e-05, + "loss": 2.3044, + "num_input_tokens_seen": 12318336, + "step": 186 + }, + { + "epoch": 0.017410024804605233, + "loss": 2.4516263008117676, + "loss_ce": 0.0043604495003819466, + "loss_iou": 1.0390625, + "loss_num": 0.07373046875, + "loss_xval": 2.453125, + "num_input_tokens_seen": 12318336, + "step": 186 + }, + { + "epoch": 0.01750362708850096, + "grad_norm": 8.447161674499512, + "learning_rate": 5e-05, + "loss": 2.2574, + "num_input_tokens_seen": 12384792, + "step": 187 + }, + { + "epoch": 0.01750362708850096, + "loss": 2.2445409297943115, + "loss_ce": 0.0033300297800451517, + "loss_iou": 0.96875, + "loss_num": 0.060546875, + "loss_xval": 2.234375, + "num_input_tokens_seen": 12384792, + "step": 187 + }, + { + "epoch": 0.017597229372396685, + "grad_norm": 10.398317337036133, + "learning_rate": 5e-05, + "loss": 2.1611, + "num_input_tokens_seen": 12450828, + "step": 188 + }, + { + "epoch": 0.017597229372396685, + "loss": 2.133500814437866, + "loss_ce": 0.006547821685671806, + "loss_iou": 0.921875, + "loss_num": 0.056640625, + "loss_xval": 2.125, + "num_input_tokens_seen": 12450828, + "step": 188 + }, + { + "epoch": 0.017690831656292413, + "grad_norm": 13.35843563079834, + "learning_rate": 5e-05, + "loss": 2.3758, + "num_input_tokens_seen": 12518072, + "step": 189 + }, + { + "epoch": 0.017690831656292413, + "loss": 2.461061954498291, + "loss_ce": 0.011843172833323479, + "loss_iou": 1.0234375, + "loss_num": 0.0810546875, + "loss_xval": 2.453125, + "num_input_tokens_seen": 12518072, + "step": 189 + }, + { + "epoch": 0.01778443394018814, + "grad_norm": 28.607030868530273, + "learning_rate": 5e-05, + "loss": 2.4175, + "num_input_tokens_seen": 12585416, + "step": 190 + }, + { + "epoch": 0.01778443394018814, + "loss": 2.3904967308044434, + "loss_ce": 0.005731300450861454, + "loss_iou": 1.0625, + "loss_num": 0.05322265625, + "loss_xval": 2.390625, + "num_input_tokens_seen": 12585416, + "step": 190 + }, + { + "epoch": 0.017878036224083866, + "grad_norm": 7.866536617279053, + "learning_rate": 5e-05, + "loss": 2.7323, + "num_input_tokens_seen": 12652420, + "step": 191 + }, + { + "epoch": 0.017878036224083866, + "loss": 2.5774574279785156, + "loss_ce": 0.006168528459966183, + "loss_iou": 0.97265625, + "loss_num": 0.12451171875, + "loss_xval": 2.578125, + "num_input_tokens_seen": 12652420, + "step": 191 + }, + { + "epoch": 0.017971638507979594, + "grad_norm": 12.57685375213623, + "learning_rate": 5e-05, + "loss": 2.746, + "num_input_tokens_seen": 12718744, + "step": 192 + }, + { + "epoch": 0.017971638507979594, + "loss": 2.8357036113739014, + "loss_ce": 0.0036723511293530464, + "loss_iou": 1.0859375, + "loss_num": 0.1328125, + "loss_xval": 2.828125, + "num_input_tokens_seen": 12718744, + "step": 192 + }, + { + "epoch": 0.018065240791875322, + "grad_norm": 7.3611531257629395, + "learning_rate": 5e-05, + "loss": 2.5383, + "num_input_tokens_seen": 12785104, + "step": 193 + }, + { + "epoch": 0.018065240791875322, + "loss": 2.5341057777404785, + "loss_ce": 0.005785365588963032, + "loss_iou": 0.98828125, + "loss_num": 0.109375, + "loss_xval": 2.53125, + "num_input_tokens_seen": 12785104, + "step": 193 + }, + { + "epoch": 0.01815884307577105, + "grad_norm": 7.051367282867432, + "learning_rate": 5e-05, + "loss": 2.3916, + "num_input_tokens_seen": 12850648, + "step": 194 + }, + { + "epoch": 0.01815884307577105, + "loss": 2.2845864295959473, + "loss_ce": 0.005289818160235882, + "loss_iou": 0.9140625, + "loss_num": 0.09033203125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 12850648, + "step": 194 + }, + { + "epoch": 0.018252445359666775, + "grad_norm": 7.7706708908081055, + "learning_rate": 5e-05, + "loss": 2.3577, + "num_input_tokens_seen": 12917360, + "step": 195 + }, + { + "epoch": 0.018252445359666775, + "loss": 2.3785312175750732, + "loss_ce": 0.00548445014283061, + "loss_iou": 0.9921875, + "loss_num": 0.078125, + "loss_xval": 2.375, + "num_input_tokens_seen": 12917360, + "step": 195 + }, + { + "epoch": 0.018346047643562503, + "grad_norm": 4.753993511199951, + "learning_rate": 5e-05, + "loss": 2.375, + "num_input_tokens_seen": 12983124, + "step": 196 + }, + { + "epoch": 0.018346047643562503, + "loss": 2.251105546951294, + "loss_ce": 0.004035319201648235, + "loss_iou": 0.9375, + "loss_num": 0.0751953125, + "loss_xval": 2.25, + "num_input_tokens_seen": 12983124, + "step": 196 + }, + { + "epoch": 0.01843964992745823, + "grad_norm": 17.095426559448242, + "learning_rate": 5e-05, + "loss": 2.101, + "num_input_tokens_seen": 13049128, + "step": 197 + }, + { + "epoch": 0.01843964992745823, + "loss": 1.9639153480529785, + "loss_ce": 0.0034661320969462395, + "loss_iou": 0.8125, + "loss_num": 0.06689453125, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 13049128, + "step": 197 + }, + { + "epoch": 0.01853325221135396, + "grad_norm": 10.60446548461914, + "learning_rate": 5e-05, + "loss": 2.1544, + "num_input_tokens_seen": 13115624, + "step": 198 + }, + { + "epoch": 0.01853325221135396, + "loss": 2.3536925315856934, + "loss_ce": 0.0021300031803548336, + "loss_iou": 1.0078125, + "loss_num": 0.06640625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 13115624, + "step": 198 + }, + { + "epoch": 0.018626854495249683, + "grad_norm": 8.845651626586914, + "learning_rate": 5e-05, + "loss": 1.9546, + "num_input_tokens_seen": 13181448, + "step": 199 + }, + { + "epoch": 0.018626854495249683, + "loss": 2.158360481262207, + "loss_ce": 0.002110264729708433, + "loss_iou": 0.93359375, + "loss_num": 0.057373046875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 13181448, + "step": 199 + }, + { + "epoch": 0.01872045677914541, + "grad_norm": 11.761173248291016, + "learning_rate": 5e-05, + "loss": 2.1373, + "num_input_tokens_seen": 13247672, + "step": 200 + }, + { + "epoch": 0.01872045677914541, + "loss": 2.325364589691162, + "loss_ce": 0.0030990494415163994, + "loss_iou": 0.9609375, + "loss_num": 0.080078125, + "loss_xval": 2.328125, + "num_input_tokens_seen": 13247672, + "step": 200 + }, + { + "epoch": 0.01881405906304114, + "grad_norm": 24.597557067871094, + "learning_rate": 5e-05, + "loss": 2.5439, + "num_input_tokens_seen": 13313348, + "step": 201 + }, + { + "epoch": 0.01881405906304114, + "loss": 2.637383460998535, + "loss_ce": 0.004570887889713049, + "loss_iou": 1.1640625, + "loss_num": 0.0615234375, + "loss_xval": 2.625, + "num_input_tokens_seen": 13313348, + "step": 201 + }, + { + "epoch": 0.018907661346936864, + "grad_norm": 9.031757354736328, + "learning_rate": 5e-05, + "loss": 2.89, + "num_input_tokens_seen": 13378600, + "step": 202 + }, + { + "epoch": 0.018907661346936864, + "loss": 2.8787484169006348, + "loss_ce": 0.005701353773474693, + "loss_iou": 1.125, + "loss_num": 0.1259765625, + "loss_xval": 2.875, + "num_input_tokens_seen": 13378600, + "step": 202 + }, + { + "epoch": 0.019001263630832592, + "grad_norm": 7.955211639404297, + "learning_rate": 5e-05, + "loss": 2.5635, + "num_input_tokens_seen": 13444416, + "step": 203 + }, + { + "epoch": 0.019001263630832592, + "loss": 2.63411283493042, + "loss_ce": 0.001300313975661993, + "loss_iou": 1.03125, + "loss_num": 0.11328125, + "loss_xval": 2.625, + "num_input_tokens_seen": 13444416, + "step": 203 + }, + { + "epoch": 0.01909486591472832, + "grad_norm": 8.365921020507812, + "learning_rate": 5e-05, + "loss": 2.5201, + "num_input_tokens_seen": 13510788, + "step": 204 + }, + { + "epoch": 0.01909486591472832, + "loss": 2.4572911262512207, + "loss_ce": 0.00221311766654253, + "loss_iou": 0.98046875, + "loss_num": 0.09912109375, + "loss_xval": 2.453125, + "num_input_tokens_seen": 13510788, + "step": 204 + }, + { + "epoch": 0.019188468198624048, + "grad_norm": 11.965587615966797, + "learning_rate": 5e-05, + "loss": 2.3749, + "num_input_tokens_seen": 13575660, + "step": 205 + }, + { + "epoch": 0.019188468198624048, + "loss": 2.3484880924224854, + "loss_ce": 0.008644196204841137, + "loss_iou": 0.95703125, + "loss_num": 0.0859375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 13575660, + "step": 205 + }, + { + "epoch": 0.019282070482519773, + "grad_norm": 18.333850860595703, + "learning_rate": 5e-05, + "loss": 2.3765, + "num_input_tokens_seen": 13641848, + "step": 206 + }, + { + "epoch": 0.019282070482519773, + "loss": 2.342848777770996, + "loss_ce": 0.0030051928479224443, + "loss_iou": 0.99609375, + "loss_num": 0.0693359375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 13641848, + "step": 206 + }, + { + "epoch": 0.0193756727664155, + "grad_norm": 15.698119163513184, + "learning_rate": 5e-05, + "loss": 2.769, + "num_input_tokens_seen": 13708084, + "step": 207 + }, + { + "epoch": 0.0193756727664155, + "loss": 2.7347211837768555, + "loss_ce": 0.0022994456812739372, + "loss_iou": 1.125, + "loss_num": 0.0966796875, + "loss_xval": 2.734375, + "num_input_tokens_seen": 13708084, + "step": 207 + }, + { + "epoch": 0.01946927505031123, + "grad_norm": 13.12657356262207, + "learning_rate": 5e-05, + "loss": 2.3812, + "num_input_tokens_seen": 13774940, + "step": 208 + }, + { + "epoch": 0.01946927505031123, + "loss": 2.2628304958343506, + "loss_ce": 0.005018184892833233, + "loss_iou": 0.953125, + "loss_num": 0.0712890625, + "loss_xval": 2.25, + "num_input_tokens_seen": 13774940, + "step": 208 + }, + { + "epoch": 0.019562877334206953, + "grad_norm": 11.415872573852539, + "learning_rate": 5e-05, + "loss": 2.2971, + "num_input_tokens_seen": 13841564, + "step": 209 + }, + { + "epoch": 0.019562877334206953, + "loss": 2.2718019485473633, + "loss_ce": 0.004223851952701807, + "loss_iou": 0.95703125, + "loss_num": 0.07080078125, + "loss_xval": 2.265625, + "num_input_tokens_seen": 13841564, + "step": 209 + }, + { + "epoch": 0.01965647961810268, + "grad_norm": 22.843965530395508, + "learning_rate": 5e-05, + "loss": 2.0982, + "num_input_tokens_seen": 13907716, + "step": 210 + }, + { + "epoch": 0.01965647961810268, + "loss": 2.142209053039551, + "loss_ce": 0.008419920690357685, + "loss_iou": 0.921875, + "loss_num": 0.05908203125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 13907716, + "step": 210 + }, + { + "epoch": 0.01975008190199841, + "grad_norm": 13.346732139587402, + "learning_rate": 5e-05, + "loss": 2.0266, + "num_input_tokens_seen": 13974352, + "step": 211 + }, + { + "epoch": 0.01975008190199841, + "loss": 2.0488786697387695, + "loss_ce": 0.006886437069624662, + "loss_iou": 0.890625, + "loss_num": 0.052001953125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 13974352, + "step": 211 + }, + { + "epoch": 0.019843684185894137, + "grad_norm": 13.644083023071289, + "learning_rate": 5e-05, + "loss": 2.3802, + "num_input_tokens_seen": 14040576, + "step": 212 + }, + { + "epoch": 0.019843684185894137, + "loss": 2.3828635215759277, + "loss_ce": 0.005910306237637997, + "loss_iou": 1.0, + "loss_num": 0.07568359375, + "loss_xval": 2.375, + "num_input_tokens_seen": 14040576, + "step": 212 + }, + { + "epoch": 0.019937286469789862, + "grad_norm": 16.804580688476562, + "learning_rate": 5e-05, + "loss": 2.2239, + "num_input_tokens_seen": 14106472, + "step": 213 + }, + { + "epoch": 0.019937286469789862, + "loss": 2.160003662109375, + "loss_ce": 0.005218386184424162, + "loss_iou": 0.92578125, + "loss_num": 0.060791015625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 14106472, + "step": 213 + }, + { + "epoch": 0.02003088875368559, + "grad_norm": 12.16080379486084, + "learning_rate": 5e-05, + "loss": 2.5757, + "num_input_tokens_seen": 14174120, + "step": 214 + }, + { + "epoch": 0.02003088875368559, + "loss": 2.5228171348571777, + "loss_ce": 0.0052390191704034805, + "loss_iou": 1.09375, + "loss_num": 0.06591796875, + "loss_xval": 2.515625, + "num_input_tokens_seen": 14174120, + "step": 214 + }, + { + "epoch": 0.020124491037581318, + "grad_norm": 8.559412956237793, + "learning_rate": 5e-05, + "loss": 2.2206, + "num_input_tokens_seen": 14240428, + "step": 215 + }, + { + "epoch": 0.020124491037581318, + "loss": 2.2577953338623047, + "loss_ce": 0.0038892091251909733, + "loss_iou": 0.984375, + "loss_num": 0.057861328125, + "loss_xval": 2.25, + "num_input_tokens_seen": 14240428, + "step": 215 + }, + { + "epoch": 0.020218093321477042, + "grad_norm": 12.233105659484863, + "learning_rate": 5e-05, + "loss": 2.1681, + "num_input_tokens_seen": 14306032, + "step": 216 + }, + { + "epoch": 0.020218093321477042, + "loss": 2.202512502670288, + "loss_ce": 0.005735145881772041, + "loss_iou": 0.890625, + "loss_num": 0.08251953125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 14306032, + "step": 216 + }, + { + "epoch": 0.02031169560537277, + "grad_norm": 28.35647201538086, + "learning_rate": 5e-05, + "loss": 2.464, + "num_input_tokens_seen": 14372164, + "step": 217 + }, + { + "epoch": 0.02031169560537277, + "loss": 2.3463313579559326, + "loss_ce": 0.0025814578402787447, + "loss_iou": 1.0234375, + "loss_num": 0.06005859375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 14372164, + "step": 217 + }, + { + "epoch": 0.0204052978892685, + "grad_norm": 9.605964660644531, + "learning_rate": 5e-05, + "loss": 2.7749, + "num_input_tokens_seen": 14438152, + "step": 218 + }, + { + "epoch": 0.0204052978892685, + "loss": 2.80424165725708, + "loss_ce": 0.0015071036759763956, + "loss_iou": 1.0859375, + "loss_num": 0.125, + "loss_xval": 2.796875, + "num_input_tokens_seen": 14438152, + "step": 218 + }, + { + "epoch": 0.020498900173164226, + "grad_norm": 5.129776477813721, + "learning_rate": 5e-05, + "loss": 2.5083, + "num_input_tokens_seen": 14503776, + "step": 219 + }, + { + "epoch": 0.020498900173164226, + "loss": 2.6203508377075195, + "loss_ce": 0.005116347223520279, + "loss_iou": 1.0546875, + "loss_num": 0.1025390625, + "loss_xval": 2.609375, + "num_input_tokens_seen": 14503776, + "step": 219 + }, + { + "epoch": 0.02059250245705995, + "grad_norm": 7.953933238983154, + "learning_rate": 5e-05, + "loss": 2.394, + "num_input_tokens_seen": 14570076, + "step": 220 + }, + { + "epoch": 0.02059250245705995, + "loss": 2.503218650817871, + "loss_ce": 0.007124858908355236, + "loss_iou": 0.98828125, + "loss_num": 0.10400390625, + "loss_xval": 2.5, + "num_input_tokens_seen": 14570076, + "step": 220 + }, + { + "epoch": 0.02068610474095568, + "grad_norm": 46.45488739013672, + "learning_rate": 5e-05, + "loss": 2.3549, + "num_input_tokens_seen": 14636444, + "step": 221 + }, + { + "epoch": 0.02068610474095568, + "loss": 2.2976410388946533, + "loss_ce": 0.00271901348605752, + "loss_iou": 0.953125, + "loss_num": 0.07861328125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 14636444, + "step": 221 + }, + { + "epoch": 0.020779707024851407, + "grad_norm": 20.381505966186523, + "learning_rate": 5e-05, + "loss": 2.2723, + "num_input_tokens_seen": 14703504, + "step": 222 + }, + { + "epoch": 0.020779707024851407, + "loss": 2.2910914421081543, + "loss_ce": 0.002028927905485034, + "loss_iou": 0.9921875, + "loss_num": 0.0615234375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 14703504, + "step": 222 + }, + { + "epoch": 0.020873309308747135, + "grad_norm": 4.785074710845947, + "learning_rate": 5e-05, + "loss": 2.3778, + "num_input_tokens_seen": 14770508, + "step": 223 + }, + { + "epoch": 0.020873309308747135, + "loss": 2.339602470397949, + "loss_ce": 0.004641698207706213, + "loss_iou": 0.9765625, + "loss_num": 0.07666015625, + "loss_xval": 2.328125, + "num_input_tokens_seen": 14770508, + "step": 223 + }, + { + "epoch": 0.02096691159264286, + "grad_norm": 9.842256546020508, + "learning_rate": 5e-05, + "loss": 2.1408, + "num_input_tokens_seen": 14836768, + "step": 224 + }, + { + "epoch": 0.02096691159264286, + "loss": 2.0448293685913086, + "loss_ce": 0.00405783299356699, + "loss_iou": 0.8515625, + "loss_num": 0.06689453125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 14836768, + "step": 224 + }, + { + "epoch": 0.021060513876538588, + "grad_norm": 9.726057052612305, + "learning_rate": 5e-05, + "loss": 2.2071, + "num_input_tokens_seen": 14901788, + "step": 225 + }, + { + "epoch": 0.021060513876538588, + "loss": 2.148120164871216, + "loss_ce": 0.0035889961291104555, + "loss_iou": 0.91015625, + "loss_num": 0.06396484375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 14901788, + "step": 225 + }, + { + "epoch": 0.021154116160434316, + "grad_norm": 29.221086502075195, + "learning_rate": 5e-05, + "loss": 2.1879, + "num_input_tokens_seen": 14967428, + "step": 226 + }, + { + "epoch": 0.021154116160434316, + "loss": 2.3783063888549805, + "loss_ce": 0.005259726196527481, + "loss_iou": 1.0546875, + "loss_num": 0.0537109375, + "loss_xval": 2.375, + "num_input_tokens_seen": 14967428, + "step": 226 + }, + { + "epoch": 0.02124771844433004, + "grad_norm": 24.138704299926758, + "learning_rate": 5e-05, + "loss": 2.9276, + "num_input_tokens_seen": 15034212, + "step": 227 + }, + { + "epoch": 0.02124771844433004, + "loss": 2.9417781829833984, + "loss_ce": 0.002325114095583558, + "loss_iou": 1.1328125, + "loss_num": 0.1357421875, + "loss_xval": 2.9375, + "num_input_tokens_seen": 15034212, + "step": 227 + }, + { + "epoch": 0.02134132072822577, + "grad_norm": 4.556021690368652, + "learning_rate": 5e-05, + "loss": 2.7162, + "num_input_tokens_seen": 15099904, + "step": 228 + }, + { + "epoch": 0.02134132072822577, + "loss": 2.652596950531006, + "loss_ce": 0.013925191015005112, + "loss_iou": 0.984375, + "loss_num": 0.1337890625, + "loss_xval": 2.640625, + "num_input_tokens_seen": 15099904, + "step": 228 + }, + { + "epoch": 0.021434923012121496, + "grad_norm": 11.66744613647461, + "learning_rate": 5e-05, + "loss": 2.5639, + "num_input_tokens_seen": 15166456, + "step": 229 + }, + { + "epoch": 0.021434923012121496, + "loss": 2.5610897541046143, + "loss_ce": 0.0064022233709692955, + "loss_iou": 1.0078125, + "loss_num": 0.107421875, + "loss_xval": 2.5625, + "num_input_tokens_seen": 15166456, + "step": 229 + }, + { + "epoch": 0.021528525296017224, + "grad_norm": 3.8487696647644043, + "learning_rate": 5e-05, + "loss": 2.5557, + "num_input_tokens_seen": 15232448, + "step": 230 + }, + { + "epoch": 0.021528525296017224, + "loss": 2.4851675033569336, + "loss_ce": 0.01446419395506382, + "loss_iou": 1.0, + "loss_num": 0.0947265625, + "loss_xval": 2.46875, + "num_input_tokens_seen": 15232448, + "step": 230 + }, + { + "epoch": 0.02162212757991295, + "grad_norm": 6.852122783660889, + "learning_rate": 5e-05, + "loss": 2.4145, + "num_input_tokens_seen": 15299076, + "step": 231 + }, + { + "epoch": 0.02162212757991295, + "loss": 2.2217366695404053, + "loss_ce": 0.006648796610534191, + "loss_iou": 0.87109375, + "loss_num": 0.09375, + "loss_xval": 2.21875, + "num_input_tokens_seen": 15299076, + "step": 231 + }, + { + "epoch": 0.021715729863808677, + "grad_norm": 10.094616889953613, + "learning_rate": 5e-05, + "loss": 2.3538, + "num_input_tokens_seen": 15365272, + "step": 232 + }, + { + "epoch": 0.021715729863808677, + "loss": 2.340567111968994, + "loss_ce": 0.006582723464816809, + "loss_iou": 0.96484375, + "loss_num": 0.080078125, + "loss_xval": 2.328125, + "num_input_tokens_seen": 15365272, + "step": 232 + }, + { + "epoch": 0.021809332147704405, + "grad_norm": 14.189200401306152, + "learning_rate": 5e-05, + "loss": 2.4126, + "num_input_tokens_seen": 15431180, + "step": 233 + }, + { + "epoch": 0.021809332147704405, + "loss": 2.4369239807128906, + "loss_ce": 0.0013770000077784061, + "loss_iou": 1.015625, + "loss_num": 0.08056640625, + "loss_xval": 2.4375, + "num_input_tokens_seen": 15431180, + "step": 233 + }, + { + "epoch": 0.02190293443160013, + "grad_norm": 16.224164962768555, + "learning_rate": 5e-05, + "loss": 2.6631, + "num_input_tokens_seen": 15498748, + "step": 234 + }, + { + "epoch": 0.02190293443160013, + "loss": 2.6548266410827637, + "loss_ce": 0.004436132963746786, + "loss_iou": 1.1328125, + "loss_num": 0.076171875, + "loss_xval": 2.65625, + "num_input_tokens_seen": 15498748, + "step": 234 + }, + { + "epoch": 0.021996536715495858, + "grad_norm": 9.499267578125, + "learning_rate": 5e-05, + "loss": 2.5154, + "num_input_tokens_seen": 15565392, + "step": 235 + }, + { + "epoch": 0.021996536715495858, + "loss": 2.5411789417266846, + "loss_ce": 0.0021164612844586372, + "loss_iou": 1.015625, + "loss_num": 0.1015625, + "loss_xval": 2.53125, + "num_input_tokens_seen": 15565392, + "step": 235 + }, + { + "epoch": 0.022090138999391586, + "grad_norm": 5.71075963973999, + "learning_rate": 5e-05, + "loss": 2.2016, + "num_input_tokens_seen": 15632800, + "step": 236 + }, + { + "epoch": 0.022090138999391586, + "loss": 2.220017910003662, + "loss_ce": 0.0012678343337029219, + "loss_iou": 0.9453125, + "loss_num": 0.06640625, + "loss_xval": 2.21875, + "num_input_tokens_seen": 15632800, + "step": 236 + }, + { + "epoch": 0.022183741283287314, + "grad_norm": 11.757972717285156, + "learning_rate": 5e-05, + "loss": 2.3164, + "num_input_tokens_seen": 15698656, + "step": 237 + }, + { + "epoch": 0.022183741283287314, + "loss": 2.343291997909546, + "loss_ce": 0.003448127768933773, + "loss_iou": 1.0078125, + "loss_num": 0.06298828125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 15698656, + "step": 237 + }, + { + "epoch": 0.022277343567183038, + "grad_norm": 8.407330513000488, + "learning_rate": 5e-05, + "loss": 2.3152, + "num_input_tokens_seen": 15764320, + "step": 238 + }, + { + "epoch": 0.022277343567183038, + "loss": 2.46926212310791, + "loss_ce": 0.00832437165081501, + "loss_iou": 1.0234375, + "loss_num": 0.08154296875, + "loss_xval": 2.46875, + "num_input_tokens_seen": 15764320, + "step": 238 + }, + { + "epoch": 0.022370945851078766, + "grad_norm": 10.095173835754395, + "learning_rate": 5e-05, + "loss": 2.2786, + "num_input_tokens_seen": 15830768, + "step": 239 + }, + { + "epoch": 0.022370945851078766, + "loss": 2.2201032638549805, + "loss_ce": 0.008189301937818527, + "loss_iou": 0.9296875, + "loss_num": 0.0712890625, + "loss_xval": 2.21875, + "num_input_tokens_seen": 15830768, + "step": 239 + }, + { + "epoch": 0.022464548134974494, + "grad_norm": 31.006059646606445, + "learning_rate": 5e-05, + "loss": 2.4645, + "num_input_tokens_seen": 15896844, + "step": 240 + }, + { + "epoch": 0.022464548134974494, + "loss": 2.4931678771972656, + "loss_ce": 0.010745798237621784, + "loss_iou": 1.09375, + "loss_num": 0.060302734375, + "loss_xval": 2.484375, + "num_input_tokens_seen": 15896844, + "step": 240 + }, + { + "epoch": 0.02255815041887022, + "grad_norm": 25.16346549987793, + "learning_rate": 5e-05, + "loss": 2.8696, + "num_input_tokens_seen": 15964128, + "step": 241 + }, + { + "epoch": 0.02255815041887022, + "loss": 2.869755268096924, + "loss_ce": 0.005497458856552839, + "loss_iou": 1.09375, + "loss_num": 0.1337890625, + "loss_xval": 2.859375, + "num_input_tokens_seen": 15964128, + "step": 241 + }, + { + "epoch": 0.022651752702765947, + "grad_norm": 5.70390510559082, + "learning_rate": 5e-05, + "loss": 2.7806, + "num_input_tokens_seen": 16030220, + "step": 242 + }, + { + "epoch": 0.022651752702765947, + "loss": 2.78690242767334, + "loss_ce": 0.0036993513349443674, + "loss_iou": 1.109375, + "loss_num": 0.111328125, + "loss_xval": 2.78125, + "num_input_tokens_seen": 16030220, + "step": 242 + }, + { + "epoch": 0.022745354986661675, + "grad_norm": 4.852331638336182, + "learning_rate": 5e-05, + "loss": 2.7649, + "num_input_tokens_seen": 16097076, + "step": 243 + }, + { + "epoch": 0.022745354986661675, + "loss": 2.715427875518799, + "loss_ce": 0.00449032848700881, + "loss_iou": 1.0859375, + "loss_num": 0.10791015625, + "loss_xval": 2.71875, + "num_input_tokens_seen": 16097076, + "step": 243 + }, + { + "epoch": 0.022838957270557403, + "grad_norm": 8.202338218688965, + "learning_rate": 5e-05, + "loss": 2.4259, + "num_input_tokens_seen": 16163904, + "step": 244 + }, + { + "epoch": 0.022838957270557403, + "loss": 2.4780466556549072, + "loss_ce": 0.00539036188274622, + "loss_iou": 1.0, + "loss_num": 0.095703125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 16163904, + "step": 244 + }, + { + "epoch": 0.022932559554453127, + "grad_norm": 27.955408096313477, + "learning_rate": 5e-05, + "loss": 2.5161, + "num_input_tokens_seen": 16230508, + "step": 245 + }, + { + "epoch": 0.022932559554453127, + "loss": 2.4997286796569824, + "loss_ce": 0.009494267404079437, + "loss_iou": 1.046875, + "loss_num": 0.07861328125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 16230508, + "step": 245 + }, + { + "epoch": 0.023026161838348855, + "grad_norm": 6.299352645874023, + "learning_rate": 5e-05, + "loss": 2.3895, + "num_input_tokens_seen": 16296232, + "step": 246 + }, + { + "epoch": 0.023026161838348855, + "loss": 2.376498222351074, + "loss_ce": 0.0036955017130821943, + "loss_iou": 0.96875, + "loss_num": 0.087890625, + "loss_xval": 2.375, + "num_input_tokens_seen": 16296232, + "step": 246 + }, + { + "epoch": 0.023119764122244583, + "grad_norm": 47.97476577758789, + "learning_rate": 5e-05, + "loss": 2.2768, + "num_input_tokens_seen": 16364056, + "step": 247 + }, + { + "epoch": 0.023119764122244583, + "loss": 2.3292150497436523, + "loss_ce": 0.004019611515104771, + "loss_iou": 0.98046875, + "loss_num": 0.07177734375, + "loss_xval": 2.328125, + "num_input_tokens_seen": 16364056, + "step": 247 + }, + { + "epoch": 0.02321336640614031, + "grad_norm": 12.00546932220459, + "learning_rate": 5e-05, + "loss": 2.109, + "num_input_tokens_seen": 16429512, + "step": 248 + }, + { + "epoch": 0.02321336640614031, + "loss": 2.0103962421417236, + "loss_ce": 0.0033161009196192026, + "loss_iou": 0.84375, + "loss_num": 0.0634765625, + "loss_xval": 2.0, + "num_input_tokens_seen": 16429512, + "step": 248 + }, + { + "epoch": 0.023306968690036036, + "grad_norm": 13.863018989562988, + "learning_rate": 5e-05, + "loss": 2.293, + "num_input_tokens_seen": 16497392, + "step": 249 + }, + { + "epoch": 0.023306968690036036, + "loss": 2.2520527839660645, + "loss_ce": 0.0020527015440165997, + "loss_iou": 1.0, + "loss_num": 0.048828125, + "loss_xval": 2.25, + "num_input_tokens_seen": 16497392, + "step": 249 + }, + { + "epoch": 0.023400570973931764, + "grad_norm": 12.705256462097168, + "learning_rate": 5e-05, + "loss": 2.6155, + "num_input_tokens_seen": 16563512, + "step": 250 + }, + { + "epoch": 0.023400570973931764, + "eval_seeclick_CIoU": -0.10535179451107979, + "eval_seeclick_GIoU": -0.1347958855330944, + "eval_seeclick_IoU": 0.08255543187260628, + "eval_seeclick_MAE_all": 0.19345863908529282, + "eval_seeclick_MAE_h": 0.22437167167663574, + "eval_seeclick_MAE_w": 0.17006926238536835, + "eval_seeclick_MAE_x_boxes": 0.2265409231185913, + "eval_seeclick_MAE_y_boxes": 0.16204872727394104, + "eval_seeclick_NUM_probability": 0.9990282654762268, + "eval_seeclick_inside_bbox": 0.14479167014360428, + "eval_seeclick_loss": 3.3097572326660156, + "eval_seeclick_loss_ce": 0.04021553136408329, + "eval_seeclick_loss_iou": 1.138916015625, + "eval_seeclick_loss_num": 0.20947265625, + "eval_seeclick_loss_xval": 3.326171875, + "eval_seeclick_runtime": 65.061, + "eval_seeclick_samples_per_second": 0.722, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 16563512, + "step": 250 + }, + { + "epoch": 0.023400570973931764, + "eval_icons_CIoU": -0.126484464854002, + "eval_icons_GIoU": -0.08203662559390068, + "eval_icons_IoU": 0.012108168564736843, + "eval_icons_MAE_all": 0.2220921739935875, + "eval_icons_MAE_h": 0.2853976711630821, + "eval_icons_MAE_w": 0.15436376631259918, + "eval_icons_MAE_x_boxes": 0.15638123452663422, + "eval_icons_MAE_y_boxes": 0.1470625400543213, + "eval_icons_NUM_probability": 0.9994451999664307, + "eval_icons_inside_bbox": 0.04340277798473835, + "eval_icons_loss": 3.319797992706299, + "eval_icons_loss_ce": 0.002282801433466375, + "eval_icons_loss_iou": 1.099853515625, + "eval_icons_loss_num": 0.230224609375, + "eval_icons_loss_xval": 3.3525390625, + "eval_icons_runtime": 71.6355, + "eval_icons_samples_per_second": 0.698, + "eval_icons_steps_per_second": 0.028, + "num_input_tokens_seen": 16563512, + "step": 250 + }, + { + "epoch": 0.023400570973931764, + "eval_screenspot_CIoU": -0.010043622304995855, + "eval_screenspot_GIoU": -0.021931735177834828, + "eval_screenspot_IoU": 0.08838420361280441, + "eval_screenspot_MAE_all": 0.17027866343657175, + "eval_screenspot_MAE_h": 0.19475771486759186, + "eval_screenspot_MAE_w": 0.1290009394288063, + "eval_screenspot_MAE_x_boxes": 0.16163779298464456, + "eval_screenspot_MAE_y_boxes": 0.11051993568738301, + "eval_screenspot_NUM_probability": 0.9991798202196757, + "eval_screenspot_inside_bbox": 0.20499999821186066, + "eval_screenspot_loss": 2.9312689304351807, + "eval_screenspot_loss_ce": 0.012030040534834066, + "eval_screenspot_loss_iou": 1.0362955729166667, + "eval_screenspot_loss_num": 0.17974853515625, + "eval_screenspot_loss_xval": 2.9694010416666665, + "eval_screenspot_runtime": 120.7494, + "eval_screenspot_samples_per_second": 0.737, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 16563512, + "step": 250 + }, + { + "epoch": 0.023400570973931764, + "eval_compot_CIoU": -0.059945203363895416, + "eval_compot_GIoU": -0.0336238956078887, + "eval_compot_IoU": 0.021148216910660267, + "eval_compot_MAE_all": 0.1428975909948349, + "eval_compot_MAE_h": 0.17755433917045593, + "eval_compot_MAE_w": 0.10511288791894913, + "eval_compot_MAE_x_boxes": 0.10315332189202309, + "eval_compot_MAE_y_boxes": 0.09815165773034096, + "eval_compot_NUM_probability": 0.9983721375465393, + "eval_compot_inside_bbox": 0.0711805559694767, + "eval_compot_loss": 2.7843072414398193, + "eval_compot_loss_ce": 0.011173189617693424, + "eval_compot_loss_iou": 1.02001953125, + "eval_compot_loss_num": 0.146759033203125, + "eval_compot_loss_xval": 2.7734375, + "eval_compot_runtime": 66.2387, + "eval_compot_samples_per_second": 0.755, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 16563512, + "step": 250 + }, + { + "epoch": 0.023400570973931764, + "eval_custom_ui_MAE_all": 0.12166675180196762, + "eval_custom_ui_MAE_x": 0.11208774521946907, + "eval_custom_ui_MAE_y": 0.13124577701091766, + "eval_custom_ui_NUM_probability": 0.999786376953125, + "eval_custom_ui_loss": 0.6037883162498474, + "eval_custom_ui_loss_ce": 0.017134987749159336, + "eval_custom_ui_loss_num": 0.11712646484375, + "eval_custom_ui_loss_xval": 0.585693359375, + "eval_custom_ui_runtime": 53.7774, + "eval_custom_ui_samples_per_second": 0.93, + "eval_custom_ui_steps_per_second": 0.037, + "num_input_tokens_seen": 16563512, + "step": 250 + }, + { + "epoch": 0.023400570973931764, + "loss": 0.5946378707885742, + "loss_ce": 0.018710199743509293, + "loss_iou": 0.0, + "loss_num": 0.115234375, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 16563512, + "step": 250 + }, + { + "epoch": 0.023494173257827492, + "grad_norm": 6.045687198638916, + "learning_rate": 5e-05, + "loss": 1.8794, + "num_input_tokens_seen": 16630008, + "step": 251 + }, + { + "epoch": 0.023494173257827492, + "loss": 1.9556176662445068, + "loss_ce": 0.004201718606054783, + "loss_iou": 0.79296875, + "loss_num": 0.07373046875, + "loss_xval": 1.953125, + "num_input_tokens_seen": 16630008, + "step": 251 + }, + { + "epoch": 0.023587775541723217, + "grad_norm": 11.652396202087402, + "learning_rate": 5e-05, + "loss": 2.1469, + "num_input_tokens_seen": 16696696, + "step": 252 + }, + { + "epoch": 0.023587775541723217, + "loss": 2.2387962341308594, + "loss_ce": 0.011257155798375607, + "loss_iou": 0.9453125, + "loss_num": 0.06787109375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 16696696, + "step": 252 + }, + { + "epoch": 0.023681377825618945, + "grad_norm": 9.669051170349121, + "learning_rate": 5e-05, + "loss": 2.4593, + "num_input_tokens_seen": 16763824, + "step": 253 + }, + { + "epoch": 0.023681377825618945, + "loss": 2.461796760559082, + "loss_ce": 0.006718586664646864, + "loss_iou": 1.03125, + "loss_num": 0.07763671875, + "loss_xval": 2.453125, + "num_input_tokens_seen": 16763824, + "step": 253 + }, + { + "epoch": 0.023774980109514673, + "grad_norm": 6.617365837097168, + "learning_rate": 5e-05, + "loss": 2.3379, + "num_input_tokens_seen": 16830448, + "step": 254 + }, + { + "epoch": 0.023774980109514673, + "loss": 2.5503010749816895, + "loss_ce": 0.005379147361963987, + "loss_iou": 1.0234375, + "loss_num": 0.0986328125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 16830448, + "step": 254 + }, + { + "epoch": 0.0238685823934104, + "grad_norm": 12.18752670288086, + "learning_rate": 5e-05, + "loss": 2.3084, + "num_input_tokens_seen": 16896776, + "step": 255 + }, + { + "epoch": 0.0238685823934104, + "loss": 2.2040317058563232, + "loss_ce": 0.005789526738226414, + "loss_iou": 0.9609375, + "loss_num": 0.0556640625, + "loss_xval": 2.203125, + "num_input_tokens_seen": 16896776, + "step": 255 + }, + { + "epoch": 0.023962184677306125, + "grad_norm": 8.434796333312988, + "learning_rate": 5e-05, + "loss": 2.4458, + "num_input_tokens_seen": 16964024, + "step": 256 + }, + { + "epoch": 0.023962184677306125, + "loss": 2.423982620239258, + "loss_ce": 0.006013859994709492, + "loss_iou": 0.98046875, + "loss_num": 0.09130859375, + "loss_xval": 2.421875, + "num_input_tokens_seen": 16964024, + "step": 256 + }, + { + "epoch": 0.024055786961201853, + "grad_norm": 25.34197235107422, + "learning_rate": 5e-05, + "loss": 1.9667, + "num_input_tokens_seen": 17030448, + "step": 257 + }, + { + "epoch": 0.024055786961201853, + "loss": 2.267993927001953, + "loss_ce": 0.005298535339534283, + "loss_iou": 0.9609375, + "loss_num": 0.0693359375, + "loss_xval": 2.265625, + "num_input_tokens_seen": 17030448, + "step": 257 + }, + { + "epoch": 0.02414938924509758, + "grad_norm": 20.773479461669922, + "learning_rate": 5e-05, + "loss": 2.4686, + "num_input_tokens_seen": 17097300, + "step": 258 + }, + { + "epoch": 0.02414938924509758, + "loss": 2.5461835861206055, + "loss_ce": 0.006144314538687468, + "loss_iou": 1.0625, + "loss_num": 0.08203125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 17097300, + "step": 258 + }, + { + "epoch": 0.024242991528993306, + "grad_norm": 13.068894386291504, + "learning_rate": 5e-05, + "loss": 2.4266, + "num_input_tokens_seen": 17163384, + "step": 259 + }, + { + "epoch": 0.024242991528993306, + "loss": 2.4044408798217773, + "loss_ce": 0.004050274379551411, + "loss_iou": 1.0, + "loss_num": 0.0791015625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 17163384, + "step": 259 + }, + { + "epoch": 0.024336593812889034, + "grad_norm": 8.591397285461426, + "learning_rate": 5e-05, + "loss": 2.0286, + "num_input_tokens_seen": 17229192, + "step": 260 + }, + { + "epoch": 0.024336593812889034, + "loss": 2.1226484775543213, + "loss_ce": 0.0064375754445791245, + "loss_iou": 0.8828125, + "loss_num": 0.0703125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 17229192, + "step": 260 + }, + { + "epoch": 0.024430196096784762, + "grad_norm": 21.167705535888672, + "learning_rate": 5e-05, + "loss": 2.0875, + "num_input_tokens_seen": 17295184, + "step": 261 + }, + { + "epoch": 0.024430196096784762, + "loss": 2.235891342163086, + "loss_ce": 0.005422517191618681, + "loss_iou": 0.9609375, + "loss_num": 0.0615234375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 17295184, + "step": 261 + }, + { + "epoch": 0.02452379838068049, + "grad_norm": 34.061092376708984, + "learning_rate": 5e-05, + "loss": 2.2078, + "num_input_tokens_seen": 17361696, + "step": 262 + }, + { + "epoch": 0.02452379838068049, + "loss": 2.2449076175689697, + "loss_ce": 0.004673250950872898, + "loss_iou": 0.9765625, + "loss_num": 0.056396484375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 17361696, + "step": 262 + }, + { + "epoch": 0.024617400664576215, + "grad_norm": 10.199859619140625, + "learning_rate": 5e-05, + "loss": 2.5873, + "num_input_tokens_seen": 17428492, + "step": 263 + }, + { + "epoch": 0.024617400664576215, + "loss": 2.6498115062713623, + "loss_ce": 0.005280329845845699, + "loss_iou": 1.109375, + "loss_num": 0.08642578125, + "loss_xval": 2.640625, + "num_input_tokens_seen": 17428492, + "step": 263 + }, + { + "epoch": 0.024711002948471943, + "grad_norm": 68.19354248046875, + "learning_rate": 5e-05, + "loss": 2.1439, + "num_input_tokens_seen": 17495072, + "step": 264 + }, + { + "epoch": 0.024711002948471943, + "loss": 1.9357881546020508, + "loss_ce": 0.005612347275018692, + "loss_iou": 0.8203125, + "loss_num": 0.056640625, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 17495072, + "step": 264 + }, + { + "epoch": 0.02480460523236767, + "grad_norm": 21.633960723876953, + "learning_rate": 5e-05, + "loss": 1.9389, + "num_input_tokens_seen": 17560868, + "step": 265 + }, + { + "epoch": 0.02480460523236767, + "loss": 2.2004802227020264, + "loss_ce": 0.005167837254703045, + "loss_iou": 0.96875, + "loss_num": 0.051025390625, + "loss_xval": 2.1875, + "num_input_tokens_seen": 17560868, + "step": 265 + }, + { + "epoch": 0.024898207516263395, + "grad_norm": 13.91876220703125, + "learning_rate": 5e-05, + "loss": 2.563, + "num_input_tokens_seen": 17626452, + "step": 266 + }, + { + "epoch": 0.024898207516263395, + "loss": 2.716935157775879, + "loss_ce": 0.0020913132466375828, + "loss_iou": 1.09375, + "loss_num": 0.1064453125, + "loss_xval": 2.71875, + "num_input_tokens_seen": 17626452, + "step": 266 + }, + { + "epoch": 0.024991809800159123, + "grad_norm": 6.295694828033447, + "learning_rate": 5e-05, + "loss": 2.582, + "num_input_tokens_seen": 17693108, + "step": 267 + }, + { + "epoch": 0.024991809800159123, + "loss": 2.455904483795166, + "loss_ce": 0.002779476810246706, + "loss_iou": 1.015625, + "loss_num": 0.08447265625, + "loss_xval": 2.453125, + "num_input_tokens_seen": 17693108, + "step": 267 + }, + { + "epoch": 0.02508541208405485, + "grad_norm": 7.04426383972168, + "learning_rate": 5e-05, + "loss": 2.3078, + "num_input_tokens_seen": 17759324, + "step": 268 + }, + { + "epoch": 0.02508541208405485, + "loss": 2.317918300628662, + "loss_ce": 0.005906580947339535, + "loss_iou": 0.9140625, + "loss_num": 0.09716796875, + "loss_xval": 2.3125, + "num_input_tokens_seen": 17759324, + "step": 268 + }, + { + "epoch": 0.02517901436795058, + "grad_norm": 13.625930786132812, + "learning_rate": 5e-05, + "loss": 2.2721, + "num_input_tokens_seen": 17826488, + "step": 269 + }, + { + "epoch": 0.02517901436795058, + "loss": 2.192628860473633, + "loss_ce": 0.003175870981067419, + "loss_iou": 0.9453125, + "loss_num": 0.05908203125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 17826488, + "step": 269 + }, + { + "epoch": 0.025272616651846304, + "grad_norm": 12.6675443649292, + "learning_rate": 5e-05, + "loss": 2.3457, + "num_input_tokens_seen": 17892828, + "step": 270 + }, + { + "epoch": 0.025272616651846304, + "loss": 2.5291335582733154, + "loss_ce": 0.0017897638026624918, + "loss_iou": 1.0703125, + "loss_num": 0.076171875, + "loss_xval": 2.53125, + "num_input_tokens_seen": 17892828, + "step": 270 + }, + { + "epoch": 0.025366218935742032, + "grad_norm": 15.294829368591309, + "learning_rate": 5e-05, + "loss": 1.9474, + "num_input_tokens_seen": 17959500, + "step": 271 + }, + { + "epoch": 0.025366218935742032, + "loss": 1.862996220588684, + "loss_ce": 0.0025225295685231686, + "loss_iou": 0.7890625, + "loss_num": 0.0576171875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 17959500, + "step": 271 + }, + { + "epoch": 0.02545982121963776, + "grad_norm": 11.249760627746582, + "learning_rate": 5e-05, + "loss": 2.192, + "num_input_tokens_seen": 18025824, + "step": 272 + }, + { + "epoch": 0.02545982121963776, + "loss": 2.178846836090088, + "loss_ce": 0.002089033368974924, + "loss_iou": 0.9453125, + "loss_num": 0.056884765625, + "loss_xval": 2.171875, + "num_input_tokens_seen": 18025824, + "step": 272 + }, + { + "epoch": 0.025553423503533488, + "grad_norm": 8.352343559265137, + "learning_rate": 5e-05, + "loss": 2.4752, + "num_input_tokens_seen": 18092556, + "step": 273 + }, + { + "epoch": 0.025553423503533488, + "loss": 2.6020169258117676, + "loss_ce": 0.006313713733106852, + "loss_iou": 1.046875, + "loss_num": 0.099609375, + "loss_xval": 2.59375, + "num_input_tokens_seen": 18092556, + "step": 273 + }, + { + "epoch": 0.025647025787429212, + "grad_norm": 6.692131042480469, + "learning_rate": 5e-05, + "loss": 2.2705, + "num_input_tokens_seen": 18158372, + "step": 274 + }, + { + "epoch": 0.025647025787429212, + "loss": 2.384133815765381, + "loss_ce": 0.0071808514185249805, + "loss_iou": 0.9765625, + "loss_num": 0.08447265625, + "loss_xval": 2.375, + "num_input_tokens_seen": 18158372, + "step": 274 + }, + { + "epoch": 0.02574062807132494, + "grad_norm": 8.52961254119873, + "learning_rate": 5e-05, + "loss": 2.0383, + "num_input_tokens_seen": 18223328, + "step": 275 + }, + { + "epoch": 0.02574062807132494, + "loss": 1.9305155277252197, + "loss_ce": 0.00473435502499342, + "loss_iou": 0.83203125, + "loss_num": 0.052001953125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 18223328, + "step": 275 + }, + { + "epoch": 0.02583423035522067, + "grad_norm": 29.442432403564453, + "learning_rate": 5e-05, + "loss": 2.4488, + "num_input_tokens_seen": 18288760, + "step": 276 + }, + { + "epoch": 0.02583423035522067, + "loss": 2.27962589263916, + "loss_ce": 0.0032587742898613214, + "loss_iou": 1.0234375, + "loss_num": 0.045654296875, + "loss_xval": 2.28125, + "num_input_tokens_seen": 18288760, + "step": 276 + }, + { + "epoch": 0.025927832639116393, + "grad_norm": 8.034605026245117, + "learning_rate": 5e-05, + "loss": 2.8575, + "num_input_tokens_seen": 18355144, + "step": 277 + }, + { + "epoch": 0.025927832639116393, + "loss": 2.8461673259735107, + "loss_ce": 0.0063237641006708145, + "loss_iou": 1.125, + "loss_num": 0.11962890625, + "loss_xval": 2.84375, + "num_input_tokens_seen": 18355144, + "step": 277 + }, + { + "epoch": 0.02602143492301212, + "grad_norm": 5.831605434417725, + "learning_rate": 5e-05, + "loss": 2.653, + "num_input_tokens_seen": 18422816, + "step": 278 + }, + { + "epoch": 0.02602143492301212, + "loss": 2.581786632537842, + "loss_ce": 0.0036615575663745403, + "loss_iou": 1.015625, + "loss_num": 0.10791015625, + "loss_xval": 2.578125, + "num_input_tokens_seen": 18422816, + "step": 278 + }, + { + "epoch": 0.02611503720690785, + "grad_norm": 6.519211292266846, + "learning_rate": 5e-05, + "loss": 2.2615, + "num_input_tokens_seen": 18488748, + "step": 279 + }, + { + "epoch": 0.02611503720690785, + "loss": 2.423325538635254, + "loss_ce": 0.004380151629447937, + "loss_iou": 0.9765625, + "loss_num": 0.09375, + "loss_xval": 2.421875, + "num_input_tokens_seen": 18488748, + "step": 279 + }, + { + "epoch": 0.026208639490803577, + "grad_norm": 8.102645874023438, + "learning_rate": 5e-05, + "loss": 2.273, + "num_input_tokens_seen": 18554808, + "step": 280 + }, + { + "epoch": 0.026208639490803577, + "loss": 2.238867998123169, + "loss_ce": 0.006446137558668852, + "loss_iou": 0.9375, + "loss_num": 0.0712890625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 18554808, + "step": 280 + }, + { + "epoch": 0.026302241774699302, + "grad_norm": 7.74899959564209, + "learning_rate": 5e-05, + "loss": 2.0421, + "num_input_tokens_seen": 18621248, + "step": 281 + }, + { + "epoch": 0.026302241774699302, + "loss": 1.935258388519287, + "loss_ce": 0.015092317946255207, + "loss_iou": 0.80078125, + "loss_num": 0.064453125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 18621248, + "step": 281 + }, + { + "epoch": 0.02639584405859503, + "grad_norm": 30.35383415222168, + "learning_rate": 5e-05, + "loss": 2.1578, + "num_input_tokens_seen": 18687824, + "step": 282 + }, + { + "epoch": 0.02639584405859503, + "loss": 2.1037988662719727, + "loss_ce": 0.01102550607174635, + "loss_iou": 0.890625, + "loss_num": 0.0615234375, + "loss_xval": 2.09375, + "num_input_tokens_seen": 18687824, + "step": 282 + }, + { + "epoch": 0.026489446342490758, + "grad_norm": 9.503483772277832, + "learning_rate": 5e-05, + "loss": 2.6332, + "num_input_tokens_seen": 18753680, + "step": 283 + }, + { + "epoch": 0.026489446342490758, + "loss": 2.821577787399292, + "loss_ce": 0.003218400292098522, + "loss_iou": 1.1875, + "loss_num": 0.08837890625, + "loss_xval": 2.8125, + "num_input_tokens_seen": 18753680, + "step": 283 + }, + { + "epoch": 0.026583048626386482, + "grad_norm": 10.875184059143066, + "learning_rate": 5e-05, + "loss": 2.2969, + "num_input_tokens_seen": 18819844, + "step": 284 + }, + { + "epoch": 0.026583048626386482, + "loss": 2.3365728855133057, + "loss_ce": 0.005518095567822456, + "loss_iou": 0.984375, + "loss_num": 0.072265625, + "loss_xval": 2.328125, + "num_input_tokens_seen": 18819844, + "step": 284 + }, + { + "epoch": 0.02667665091028221, + "grad_norm": 9.263823509216309, + "learning_rate": 5e-05, + "loss": 1.9969, + "num_input_tokens_seen": 18885644, + "step": 285 + }, + { + "epoch": 0.02667665091028221, + "loss": 1.907745122909546, + "loss_ce": 0.0014951556222513318, + "loss_iou": 0.8203125, + "loss_num": 0.05224609375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 18885644, + "step": 285 + }, + { + "epoch": 0.02677025319417794, + "grad_norm": 29.129364013671875, + "learning_rate": 5e-05, + "loss": 2.6109, + "num_input_tokens_seen": 18950844, + "step": 286 + }, + { + "epoch": 0.02677025319417794, + "loss": 2.4338810443878174, + "loss_ce": 0.003705211216583848, + "loss_iou": 1.0859375, + "loss_num": 0.052001953125, + "loss_xval": 2.4375, + "num_input_tokens_seen": 18950844, + "step": 286 + }, + { + "epoch": 0.026863855478073666, + "grad_norm": 17.17812728881836, + "learning_rate": 5e-05, + "loss": 2.8428, + "num_input_tokens_seen": 19016132, + "step": 287 + }, + { + "epoch": 0.026863855478073666, + "loss": 3.132232189178467, + "loss_ce": 0.005278948228806257, + "loss_iou": 1.2421875, + "loss_num": 0.12890625, + "loss_xval": 3.125, + "num_input_tokens_seen": 19016132, + "step": 287 + }, + { + "epoch": 0.02695745776196939, + "grad_norm": 8.610562324523926, + "learning_rate": 5e-05, + "loss": 2.2594, + "num_input_tokens_seen": 19081500, + "step": 288 + }, + { + "epoch": 0.02695745776196939, + "loss": 2.117274761199951, + "loss_ce": 0.007167115341871977, + "loss_iou": 0.81640625, + "loss_num": 0.0947265625, + "loss_xval": 2.109375, + "num_input_tokens_seen": 19081500, + "step": 288 + }, + { + "epoch": 0.02705106004586512, + "grad_norm": 6.186338901519775, + "learning_rate": 5e-05, + "loss": 2.6048, + "num_input_tokens_seen": 19147244, + "step": 289 + }, + { + "epoch": 0.02705106004586512, + "loss": 2.6463186740875244, + "loss_ce": 0.007646896876394749, + "loss_iou": 1.0625, + "loss_num": 0.10205078125, + "loss_xval": 2.640625, + "num_input_tokens_seen": 19147244, + "step": 289 + }, + { + "epoch": 0.027144662329760847, + "grad_norm": 9.07485294342041, + "learning_rate": 5e-05, + "loss": 2.3572, + "num_input_tokens_seen": 19215032, + "step": 290 + }, + { + "epoch": 0.027144662329760847, + "loss": 2.259942054748535, + "loss_ce": 0.0060356780886650085, + "loss_iou": 0.95703125, + "loss_num": 0.06884765625, + "loss_xval": 2.25, + "num_input_tokens_seen": 19215032, + "step": 290 + }, + { + "epoch": 0.02723826461365657, + "grad_norm": 8.642754554748535, + "learning_rate": 5e-05, + "loss": 2.4579, + "num_input_tokens_seen": 19280924, + "step": 291 + }, + { + "epoch": 0.02723826461365657, + "loss": 2.3687572479248047, + "loss_ce": 0.0054758149199187756, + "loss_iou": 1.0, + "loss_num": 0.072265625, + "loss_xval": 2.359375, + "num_input_tokens_seen": 19280924, + "step": 291 + }, + { + "epoch": 0.0273318668975523, + "grad_norm": 7.2403483390808105, + "learning_rate": 5e-05, + "loss": 2.2344, + "num_input_tokens_seen": 19346720, + "step": 292 + }, + { + "epoch": 0.0273318668975523, + "loss": 2.137115478515625, + "loss_ce": 0.007232709787786007, + "loss_iou": 0.91015625, + "loss_num": 0.061767578125, + "loss_xval": 2.125, + "num_input_tokens_seen": 19346720, + "step": 292 + }, + { + "epoch": 0.027425469181448028, + "grad_norm": 8.314435005187988, + "learning_rate": 5e-05, + "loss": 2.0899, + "num_input_tokens_seen": 19412088, + "step": 293 + }, + { + "epoch": 0.027425469181448028, + "loss": 1.943402886390686, + "loss_ce": 0.0049263592809438705, + "loss_iou": 0.82421875, + "loss_num": 0.0576171875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 19412088, + "step": 293 + }, + { + "epoch": 0.027519071465343756, + "grad_norm": 9.556326866149902, + "learning_rate": 5e-05, + "loss": 2.2337, + "num_input_tokens_seen": 19479208, + "step": 294 + }, + { + "epoch": 0.027519071465343756, + "loss": 2.3005897998809814, + "loss_ce": 0.0037147465627640486, + "loss_iou": 0.98046875, + "loss_num": 0.06640625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 19479208, + "step": 294 + }, + { + "epoch": 0.02761267374923948, + "grad_norm": 13.136784553527832, + "learning_rate": 5e-05, + "loss": 2.1323, + "num_input_tokens_seen": 19544596, + "step": 295 + }, + { + "epoch": 0.02761267374923948, + "loss": 2.2036406993865967, + "loss_ce": 0.010281315073370934, + "loss_iou": 0.95703125, + "loss_num": 0.05615234375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 19544596, + "step": 295 + }, + { + "epoch": 0.02770627603313521, + "grad_norm": 5.934866428375244, + "learning_rate": 5e-05, + "loss": 2.0263, + "num_input_tokens_seen": 19610172, + "step": 296 + }, + { + "epoch": 0.02770627603313521, + "loss": 2.067943572998047, + "loss_ce": 0.006664199288934469, + "loss_iou": 0.859375, + "loss_num": 0.068359375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 19610172, + "step": 296 + }, + { + "epoch": 0.027799878317030936, + "grad_norm": 22.314382553100586, + "learning_rate": 5e-05, + "loss": 1.9041, + "num_input_tokens_seen": 19675376, + "step": 297 + }, + { + "epoch": 0.027799878317030936, + "loss": 1.9297223091125488, + "loss_ce": 0.0033306607510894537, + "loss_iou": 0.81640625, + "loss_num": 0.05859375, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 19675376, + "step": 297 + }, + { + "epoch": 0.027893480600926664, + "grad_norm": 13.681904792785645, + "learning_rate": 5e-05, + "loss": 2.1424, + "num_input_tokens_seen": 19742156, + "step": 298 + }, + { + "epoch": 0.027893480600926664, + "loss": 2.1528525352478027, + "loss_ce": 0.007344689220190048, + "loss_iou": 0.9375, + "loss_num": 0.054931640625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 19742156, + "step": 298 + }, + { + "epoch": 0.02798708288482239, + "grad_norm": 14.090749740600586, + "learning_rate": 5e-05, + "loss": 2.1478, + "num_input_tokens_seen": 19807300, + "step": 299 + }, + { + "epoch": 0.02798708288482239, + "loss": 2.121377944946289, + "loss_ce": 0.0027258479967713356, + "loss_iou": 0.89453125, + "loss_num": 0.06689453125, + "loss_xval": 2.125, + "num_input_tokens_seen": 19807300, + "step": 299 + }, + { + "epoch": 0.028080685168718117, + "grad_norm": 12.29017448425293, + "learning_rate": 5e-05, + "loss": 2.1001, + "num_input_tokens_seen": 19872908, + "step": 300 + }, + { + "epoch": 0.028080685168718117, + "loss": 2.1516480445861816, + "loss_ce": 0.00321069173514843, + "loss_iou": 0.93359375, + "loss_num": 0.05712890625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 19872908, + "step": 300 + }, + { + "epoch": 0.028174287452613845, + "grad_norm": 11.03828239440918, + "learning_rate": 5e-05, + "loss": 2.3709, + "num_input_tokens_seen": 19939296, + "step": 301 + }, + { + "epoch": 0.028174287452613845, + "loss": 2.3694803714752197, + "loss_ce": 0.007175634615123272, + "loss_iou": 0.98046875, + "loss_num": 0.0791015625, + "loss_xval": 2.359375, + "num_input_tokens_seen": 19939296, + "step": 301 + }, + { + "epoch": 0.02826788973650957, + "grad_norm": 18.373350143432617, + "learning_rate": 5e-05, + "loss": 2.1843, + "num_input_tokens_seen": 20005232, + "step": 302 + }, + { + "epoch": 0.02826788973650957, + "loss": 2.271916389465332, + "loss_ce": 0.005314883776009083, + "loss_iou": 0.98046875, + "loss_num": 0.061279296875, + "loss_xval": 2.265625, + "num_input_tokens_seen": 20005232, + "step": 302 + }, + { + "epoch": 0.028361492020405298, + "grad_norm": 10.74294376373291, + "learning_rate": 5e-05, + "loss": 2.1218, + "num_input_tokens_seen": 20071088, + "step": 303 + }, + { + "epoch": 0.028361492020405298, + "loss": 2.2172842025756836, + "loss_ce": 0.007323308382183313, + "loss_iou": 0.95703125, + "loss_num": 0.05908203125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 20071088, + "step": 303 + }, + { + "epoch": 0.028455094304301026, + "grad_norm": 15.19969367980957, + "learning_rate": 5e-05, + "loss": 1.9467, + "num_input_tokens_seen": 20137280, + "step": 304 + }, + { + "epoch": 0.028455094304301026, + "loss": 2.124724864959717, + "loss_ce": 0.007537480443716049, + "loss_iou": 0.9140625, + "loss_num": 0.058349609375, + "loss_xval": 2.125, + "num_input_tokens_seen": 20137280, + "step": 304 + }, + { + "epoch": 0.028548696588196754, + "grad_norm": 15.501870155334473, + "learning_rate": 5e-05, + "loss": 2.361, + "num_input_tokens_seen": 20203344, + "step": 305 + }, + { + "epoch": 0.028548696588196754, + "loss": 2.3150863647460938, + "loss_ce": 0.010398984886705875, + "loss_iou": 0.984375, + "loss_num": 0.06689453125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 20203344, + "step": 305 + }, + { + "epoch": 0.028642298872092478, + "grad_norm": 9.604381561279297, + "learning_rate": 5e-05, + "loss": 1.8154, + "num_input_tokens_seen": 20269392, + "step": 306 + }, + { + "epoch": 0.028642298872092478, + "loss": 1.6880543231964111, + "loss_ce": 0.003728260286152363, + "loss_iou": 0.7265625, + "loss_num": 0.045654296875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 20269392, + "step": 306 + }, + { + "epoch": 0.028735901155988206, + "grad_norm": 9.203229904174805, + "learning_rate": 5e-05, + "loss": 2.072, + "num_input_tokens_seen": 20336624, + "step": 307 + }, + { + "epoch": 0.028735901155988206, + "loss": 2.2043728828430176, + "loss_ce": 0.003201248124241829, + "loss_iou": 0.9609375, + "loss_num": 0.05517578125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 20336624, + "step": 307 + }, + { + "epoch": 0.028829503439883934, + "grad_norm": 10.39322566986084, + "learning_rate": 5e-05, + "loss": 2.0674, + "num_input_tokens_seen": 20402832, + "step": 308 + }, + { + "epoch": 0.028829503439883934, + "loss": 1.8517069816589355, + "loss_ce": 0.006492123007774353, + "loss_iou": 0.7890625, + "loss_num": 0.05322265625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 20402832, + "step": 308 + }, + { + "epoch": 0.02892310572377966, + "grad_norm": 12.747912406921387, + "learning_rate": 5e-05, + "loss": 1.9345, + "num_input_tokens_seen": 20468660, + "step": 309 + }, + { + "epoch": 0.02892310572377966, + "loss": 1.903911828994751, + "loss_ce": 0.0054743001237511635, + "loss_iou": 0.8359375, + "loss_num": 0.04541015625, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 20468660, + "step": 309 + }, + { + "epoch": 0.029016708007675387, + "grad_norm": 14.974900245666504, + "learning_rate": 5e-05, + "loss": 2.5532, + "num_input_tokens_seen": 20535584, + "step": 310 + }, + { + "epoch": 0.029016708007675387, + "loss": 2.6041457653045654, + "loss_ce": 0.0035598722752183676, + "loss_iou": 1.109375, + "loss_num": 0.07470703125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 20535584, + "step": 310 + }, + { + "epoch": 0.029110310291571115, + "grad_norm": 6.866947650909424, + "learning_rate": 5e-05, + "loss": 2.3236, + "num_input_tokens_seen": 20602124, + "step": 311 + }, + { + "epoch": 0.029110310291571115, + "loss": 2.388014316558838, + "loss_ce": 0.007154986262321472, + "loss_iou": 1.0, + "loss_num": 0.0751953125, + "loss_xval": 2.375, + "num_input_tokens_seen": 20602124, + "step": 311 + }, + { + "epoch": 0.029203912575466843, + "grad_norm": 16.738218307495117, + "learning_rate": 5e-05, + "loss": 2.1105, + "num_input_tokens_seen": 20668156, + "step": 312 + }, + { + "epoch": 0.029203912575466843, + "loss": 2.0506856441497803, + "loss_ce": 0.0062519097700715065, + "loss_iou": 0.859375, + "loss_num": 0.0654296875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 20668156, + "step": 312 + }, + { + "epoch": 0.029297514859362567, + "grad_norm": 41.55394744873047, + "learning_rate": 5e-05, + "loss": 2.0664, + "num_input_tokens_seen": 20734828, + "step": 313 + }, + { + "epoch": 0.029297514859362567, + "loss": 1.9767587184906006, + "loss_ce": 0.005078917369246483, + "loss_iou": 0.85546875, + "loss_num": 0.05322265625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 20734828, + "step": 313 + }, + { + "epoch": 0.029391117143258295, + "grad_norm": 26.242605209350586, + "learning_rate": 5e-05, + "loss": 2.488, + "num_input_tokens_seen": 20801332, + "step": 314 + }, + { + "epoch": 0.029391117143258295, + "loss": 2.4511401653289795, + "loss_ce": 0.0038744183257222176, + "loss_iou": 1.09375, + "loss_num": 0.053466796875, + "loss_xval": 2.453125, + "num_input_tokens_seen": 20801332, + "step": 314 + }, + { + "epoch": 0.029484719427154023, + "grad_norm": 7.956735610961914, + "learning_rate": 5e-05, + "loss": 2.3767, + "num_input_tokens_seen": 20867284, + "step": 315 + }, + { + "epoch": 0.029484719427154023, + "loss": 2.5748186111450195, + "loss_ce": 0.0064592985436320305, + "loss_iou": 1.046875, + "loss_num": 0.0927734375, + "loss_xval": 2.5625, + "num_input_tokens_seen": 20867284, + "step": 315 + }, + { + "epoch": 0.029578321711049748, + "grad_norm": 5.501648426055908, + "learning_rate": 5e-05, + "loss": 2.4083, + "num_input_tokens_seen": 20932536, + "step": 316 + }, + { + "epoch": 0.029578321711049748, + "loss": 2.3189499378204346, + "loss_ce": 0.002543692011386156, + "loss_iou": 0.953125, + "loss_num": 0.08203125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 20932536, + "step": 316 + }, + { + "epoch": 0.029671923994945476, + "grad_norm": 5.204927444458008, + "learning_rate": 5e-05, + "loss": 2.2079, + "num_input_tokens_seen": 20997788, + "step": 317 + }, + { + "epoch": 0.029671923994945476, + "loss": 2.251739263534546, + "loss_ce": 0.0017392246518284082, + "loss_iou": 0.90625, + "loss_num": 0.0869140625, + "loss_xval": 2.25, + "num_input_tokens_seen": 20997788, + "step": 317 + }, + { + "epoch": 0.029765526278841204, + "grad_norm": 13.988932609558105, + "learning_rate": 5e-05, + "loss": 2.1701, + "num_input_tokens_seen": 21063624, + "step": 318 + }, + { + "epoch": 0.029765526278841204, + "loss": 2.1111536026000977, + "loss_ce": 0.007637982256710529, + "loss_iou": 0.91015625, + "loss_num": 0.057861328125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 21063624, + "step": 318 + }, + { + "epoch": 0.029859128562736932, + "grad_norm": 41.570011138916016, + "learning_rate": 5e-05, + "loss": 2.1344, + "num_input_tokens_seen": 21129904, + "step": 319 + }, + { + "epoch": 0.029859128562736932, + "loss": 2.3227148056030273, + "loss_ce": 0.005332026164978743, + "loss_iou": 1.0078125, + "loss_num": 0.060546875, + "loss_xval": 2.3125, + "num_input_tokens_seen": 21129904, + "step": 319 + }, + { + "epoch": 0.029952730846632657, + "grad_norm": 20.925189971923828, + "learning_rate": 5e-05, + "loss": 2.1157, + "num_input_tokens_seen": 21195652, + "step": 320 + }, + { + "epoch": 0.029952730846632657, + "loss": 2.256359815597534, + "loss_ce": 0.006359857507050037, + "loss_iou": 0.9375, + "loss_num": 0.07470703125, + "loss_xval": 2.25, + "num_input_tokens_seen": 21195652, + "step": 320 + }, + { + "epoch": 0.030046333130528385, + "grad_norm": 14.465229034423828, + "learning_rate": 5e-05, + "loss": 2.0414, + "num_input_tokens_seen": 21261820, + "step": 321 + }, + { + "epoch": 0.030046333130528385, + "loss": 2.1699764728546143, + "loss_ce": 0.002129678148776293, + "loss_iou": 0.89453125, + "loss_num": 0.0751953125, + "loss_xval": 2.171875, + "num_input_tokens_seen": 21261820, + "step": 321 + }, + { + "epoch": 0.030139935414424113, + "grad_norm": 16.589031219482422, + "learning_rate": 5e-05, + "loss": 2.0181, + "num_input_tokens_seen": 21326652, + "step": 322 + }, + { + "epoch": 0.030139935414424113, + "loss": 1.9930992126464844, + "loss_ce": 0.004818132147192955, + "loss_iou": 0.8359375, + "loss_num": 0.06396484375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 21326652, + "step": 322 + }, + { + "epoch": 0.030233537698319837, + "grad_norm": 8.369630813598633, + "learning_rate": 5e-05, + "loss": 2.4247, + "num_input_tokens_seen": 21392536, + "step": 323 + }, + { + "epoch": 0.030233537698319837, + "loss": 2.5111265182495117, + "loss_ce": 0.007220389321446419, + "loss_iou": 1.0390625, + "loss_num": 0.0859375, + "loss_xval": 2.5, + "num_input_tokens_seen": 21392536, + "step": 323 + }, + { + "epoch": 0.030327139982215565, + "grad_norm": 13.11899471282959, + "learning_rate": 5e-05, + "loss": 1.9538, + "num_input_tokens_seen": 21459492, + "step": 324 + }, + { + "epoch": 0.030327139982215565, + "loss": 1.806440830230713, + "loss_ce": 0.006636073812842369, + "loss_iou": 0.77734375, + "loss_num": 0.049560546875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 21459492, + "step": 324 + }, + { + "epoch": 0.030420742266111293, + "grad_norm": 16.149150848388672, + "learning_rate": 5e-05, + "loss": 1.9541, + "num_input_tokens_seen": 21525064, + "step": 325 + }, + { + "epoch": 0.030420742266111293, + "loss": 2.129866123199463, + "loss_ce": 0.006819196045398712, + "loss_iou": 0.9375, + "loss_num": 0.049560546875, + "loss_xval": 2.125, + "num_input_tokens_seen": 21525064, + "step": 325 + }, + { + "epoch": 0.03051434455000702, + "grad_norm": 14.965729713439941, + "learning_rate": 5e-05, + "loss": 2.3008, + "num_input_tokens_seen": 21590260, + "step": 326 + }, + { + "epoch": 0.03051434455000702, + "loss": 2.1891860961914062, + "loss_ce": 0.0026626817416399717, + "loss_iou": 0.9609375, + "loss_num": 0.05322265625, + "loss_xval": 2.1875, + "num_input_tokens_seen": 21590260, + "step": 326 + }, + { + "epoch": 0.030607946833902746, + "grad_norm": 135.52027893066406, + "learning_rate": 5e-05, + "loss": 2.1703, + "num_input_tokens_seen": 21657200, + "step": 327 + }, + { + "epoch": 0.030607946833902746, + "loss": 2.159827947616577, + "loss_ce": 0.0026013727765530348, + "loss_iou": 0.921875, + "loss_num": 0.0625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 21657200, + "step": 327 + }, + { + "epoch": 0.030701549117798474, + "grad_norm": 10.350919723510742, + "learning_rate": 5e-05, + "loss": 2.3369, + "num_input_tokens_seen": 21724264, + "step": 328 + }, + { + "epoch": 0.030701549117798474, + "loss": 2.3039512634277344, + "loss_ce": 0.0031697875820100307, + "loss_iou": 0.9609375, + "loss_num": 0.0751953125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 21724264, + "step": 328 + }, + { + "epoch": 0.030795151401694202, + "grad_norm": 16.0537052154541, + "learning_rate": 5e-05, + "loss": 1.9921, + "num_input_tokens_seen": 21790456, + "step": 329 + }, + { + "epoch": 0.030795151401694202, + "loss": 2.126025438308716, + "loss_ce": 0.0020019442308694124, + "loss_iou": 0.890625, + "loss_num": 0.06884765625, + "loss_xval": 2.125, + "num_input_tokens_seen": 21790456, + "step": 329 + }, + { + "epoch": 0.03088875368558993, + "grad_norm": 17.134672164916992, + "learning_rate": 5e-05, + "loss": 2.2526, + "num_input_tokens_seen": 21856772, + "step": 330 + }, + { + "epoch": 0.03088875368558993, + "loss": 2.4109668731689453, + "loss_ce": 0.004716916009783745, + "loss_iou": 1.0234375, + "loss_num": 0.072265625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 21856772, + "step": 330 + }, + { + "epoch": 0.030982355969485655, + "grad_norm": 19.04180335998535, + "learning_rate": 5e-05, + "loss": 2.216, + "num_input_tokens_seen": 21922996, + "step": 331 + }, + { + "epoch": 0.030982355969485655, + "loss": 2.4084181785583496, + "loss_ce": 0.0021683042868971825, + "loss_iou": 1.046875, + "loss_num": 0.0625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 21922996, + "step": 331 + }, + { + "epoch": 0.031075958253381383, + "grad_norm": 8.35339641571045, + "learning_rate": 5e-05, + "loss": 2.229, + "num_input_tokens_seen": 21989376, + "step": 332 + }, + { + "epoch": 0.031075958253381383, + "loss": 2.103503465652466, + "loss_ce": 0.005847356282174587, + "loss_iou": 0.86328125, + "loss_num": 0.07421875, + "loss_xval": 2.09375, + "num_input_tokens_seen": 21989376, + "step": 332 + }, + { + "epoch": 0.03116956053727711, + "grad_norm": 11.053943634033203, + "learning_rate": 5e-05, + "loss": 1.9455, + "num_input_tokens_seen": 22054748, + "step": 333 + }, + { + "epoch": 0.03116956053727711, + "loss": 1.8045395612716675, + "loss_ce": 0.003758274018764496, + "loss_iou": 0.76953125, + "loss_num": 0.05224609375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 22054748, + "step": 333 + }, + { + "epoch": 0.031263162821172835, + "grad_norm": 12.617913246154785, + "learning_rate": 5e-05, + "loss": 2.0766, + "num_input_tokens_seen": 22121664, + "step": 334 + }, + { + "epoch": 0.031263162821172835, + "loss": 1.9510293006896973, + "loss_ce": 0.004740268923342228, + "loss_iou": 0.85546875, + "loss_num": 0.04736328125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 22121664, + "step": 334 + }, + { + "epoch": 0.03135676510506857, + "grad_norm": 11.487972259521484, + "learning_rate": 5e-05, + "loss": 1.9995, + "num_input_tokens_seen": 22187308, + "step": 335 + }, + { + "epoch": 0.03135676510506857, + "loss": 1.9693429470062256, + "loss_ce": 0.00547579862177372, + "loss_iou": 0.8203125, + "loss_num": 0.06494140625, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 22187308, + "step": 335 + }, + { + "epoch": 0.03145036738896429, + "grad_norm": 25.225971221923828, + "learning_rate": 5e-05, + "loss": 2.189, + "num_input_tokens_seen": 22254452, + "step": 336 + }, + { + "epoch": 0.03145036738896429, + "loss": 2.1077098846435547, + "loss_ce": 0.004194378387182951, + "loss_iou": 0.9296875, + "loss_num": 0.048583984375, + "loss_xval": 2.109375, + "num_input_tokens_seen": 22254452, + "step": 336 + }, + { + "epoch": 0.031543969672860016, + "grad_norm": 8.82065486907959, + "learning_rate": 5e-05, + "loss": 2.6442, + "num_input_tokens_seen": 22320172, + "step": 337 + }, + { + "epoch": 0.031543969672860016, + "loss": 2.5134079456329346, + "loss_ce": 0.007548556663095951, + "loss_iou": 1.046875, + "loss_num": 0.0830078125, + "loss_xval": 2.5, + "num_input_tokens_seen": 22320172, + "step": 337 + }, + { + "epoch": 0.03163757195675575, + "grad_norm": 9.14218807220459, + "learning_rate": 5e-05, + "loss": 2.1566, + "num_input_tokens_seen": 22385548, + "step": 338 + }, + { + "epoch": 0.03163757195675575, + "loss": 1.9683382511138916, + "loss_ce": 0.004959251265972853, + "loss_iou": 0.7734375, + "loss_num": 0.0830078125, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 22385548, + "step": 338 + }, + { + "epoch": 0.03173117424065147, + "grad_norm": 7.104374408721924, + "learning_rate": 5e-05, + "loss": 2.0937, + "num_input_tokens_seen": 22451460, + "step": 339 + }, + { + "epoch": 0.03173117424065147, + "loss": 2.0578629970550537, + "loss_ce": 0.0021988661028444767, + "loss_iou": 0.875, + "loss_num": 0.060546875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 22451460, + "step": 339 + }, + { + "epoch": 0.031824776524547196, + "grad_norm": 7.342142105102539, + "learning_rate": 5e-05, + "loss": 1.9958, + "num_input_tokens_seen": 22519000, + "step": 340 + }, + { + "epoch": 0.031824776524547196, + "loss": 2.2122642993927, + "loss_ce": 0.007186007685959339, + "loss_iou": 0.92578125, + "loss_num": 0.06982421875, + "loss_xval": 2.203125, + "num_input_tokens_seen": 22519000, + "step": 340 + }, + { + "epoch": 0.03191837880844293, + "grad_norm": 15.019513130187988, + "learning_rate": 5e-05, + "loss": 2.0717, + "num_input_tokens_seen": 22584336, + "step": 341 + }, + { + "epoch": 0.03191837880844293, + "loss": 2.1923344135284424, + "loss_ce": 0.006787460297346115, + "loss_iou": 0.8984375, + "loss_num": 0.078125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 22584336, + "step": 341 + }, + { + "epoch": 0.03201198109233865, + "grad_norm": 13.888160705566406, + "learning_rate": 5e-05, + "loss": 2.2871, + "num_input_tokens_seen": 22650512, + "step": 342 + }, + { + "epoch": 0.03201198109233865, + "loss": 2.2013368606567383, + "loss_ce": 0.006024563685059547, + "loss_iou": 0.94921875, + "loss_num": 0.06005859375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 22650512, + "step": 342 + }, + { + "epoch": 0.03210558337623438, + "grad_norm": 51.5828971862793, + "learning_rate": 5e-05, + "loss": 2.2569, + "num_input_tokens_seen": 22717600, + "step": 343 + }, + { + "epoch": 0.03210558337623438, + "loss": 2.141033411026001, + "loss_ce": 0.007244350388646126, + "loss_iou": 0.921875, + "loss_num": 0.0576171875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 22717600, + "step": 343 + }, + { + "epoch": 0.03219918566013011, + "grad_norm": 19.39960479736328, + "learning_rate": 5e-05, + "loss": 2.0903, + "num_input_tokens_seen": 22784816, + "step": 344 + }, + { + "epoch": 0.03219918566013011, + "loss": 2.307943820953369, + "loss_ce": 0.0071627311408519745, + "loss_iou": 0.9609375, + "loss_num": 0.07470703125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 22784816, + "step": 344 + }, + { + "epoch": 0.03229278794402583, + "grad_norm": 8.64476203918457, + "learning_rate": 5e-05, + "loss": 1.9282, + "num_input_tokens_seen": 22850788, + "step": 345 + }, + { + "epoch": 0.03229278794402583, + "loss": 1.9108871221542358, + "loss_ce": 0.009031591936945915, + "loss_iou": 0.828125, + "loss_num": 0.050048828125, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 22850788, + "step": 345 + }, + { + "epoch": 0.032386390227921565, + "grad_norm": 13.204330444335938, + "learning_rate": 5e-05, + "loss": 1.729, + "num_input_tokens_seen": 22918332, + "step": 346 + }, + { + "epoch": 0.032386390227921565, + "loss": 1.6758954524993896, + "loss_ce": 0.006461820099502802, + "loss_iou": 0.70703125, + "loss_num": 0.05126953125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 22918332, + "step": 346 + }, + { + "epoch": 0.03247999251181729, + "grad_norm": 34.31864929199219, + "learning_rate": 5e-05, + "loss": 2.4913, + "num_input_tokens_seen": 22984840, + "step": 347 + }, + { + "epoch": 0.03247999251181729, + "loss": 2.3904194831848145, + "loss_ce": 0.003700823523104191, + "loss_iou": 1.0703125, + "loss_num": 0.049560546875, + "loss_xval": 2.390625, + "num_input_tokens_seen": 22984840, + "step": 347 + }, + { + "epoch": 0.032573594795713014, + "grad_norm": 7.036735534667969, + "learning_rate": 5e-05, + "loss": 2.7401, + "num_input_tokens_seen": 23051540, + "step": 348 + }, + { + "epoch": 0.032573594795713014, + "loss": 2.7596395015716553, + "loss_ce": 0.007686318829655647, + "loss_iou": 1.125, + "loss_num": 0.1005859375, + "loss_xval": 2.75, + "num_input_tokens_seen": 23051540, + "step": 348 + }, + { + "epoch": 0.032667197079608745, + "grad_norm": 6.597587585449219, + "learning_rate": 5e-05, + "loss": 2.674, + "num_input_tokens_seen": 23117384, + "step": 349 + }, + { + "epoch": 0.032667197079608745, + "loss": 2.6734397411346436, + "loss_ce": 0.007424092385917902, + "loss_iou": 1.0625, + "loss_num": 0.10986328125, + "loss_xval": 2.671875, + "num_input_tokens_seen": 23117384, + "step": 349 + }, + { + "epoch": 0.03276079936350447, + "grad_norm": 9.384273529052734, + "learning_rate": 5e-05, + "loss": 2.3646, + "num_input_tokens_seen": 23183040, + "step": 350 + }, + { + "epoch": 0.03276079936350447, + "loss": 2.3336098194122314, + "loss_ce": 0.005484943278133869, + "loss_iou": 0.953125, + "loss_num": 0.083984375, + "loss_xval": 2.328125, + "num_input_tokens_seen": 23183040, + "step": 350 + }, + { + "epoch": 0.032854401647400194, + "grad_norm": 25.62833023071289, + "learning_rate": 5e-05, + "loss": 2.3526, + "num_input_tokens_seen": 23249424, + "step": 351 + }, + { + "epoch": 0.032854401647400194, + "loss": 2.3824148178100586, + "loss_ce": 0.003508599940687418, + "loss_iou": 0.96484375, + "loss_num": 0.09033203125, + "loss_xval": 2.375, + "num_input_tokens_seen": 23249424, + "step": 351 + }, + { + "epoch": 0.032948003931295926, + "grad_norm": 11.983054161071777, + "learning_rate": 5e-05, + "loss": 2.177, + "num_input_tokens_seen": 23315636, + "step": 352 + }, + { + "epoch": 0.032948003931295926, + "loss": 2.2440249919891357, + "loss_ce": 0.005743754096329212, + "loss_iou": 0.953125, + "loss_num": 0.06689453125, + "loss_xval": 2.234375, + "num_input_tokens_seen": 23315636, + "step": 352 + }, + { + "epoch": 0.03304160621519165, + "grad_norm": 218.51168823242188, + "learning_rate": 5e-05, + "loss": 1.9974, + "num_input_tokens_seen": 23382552, + "step": 353 + }, + { + "epoch": 0.03304160621519165, + "loss": 1.7904752492904663, + "loss_ce": 0.0036100444849580526, + "loss_iou": 0.765625, + "loss_num": 0.050537109375, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 23382552, + "step": 353 + }, + { + "epoch": 0.033135208499087375, + "grad_norm": 10.059981346130371, + "learning_rate": 5e-05, + "loss": 2.1556, + "num_input_tokens_seen": 23447920, + "step": 354 + }, + { + "epoch": 0.033135208499087375, + "loss": 1.9498950242996216, + "loss_ce": 0.0026294696144759655, + "loss_iou": 0.84765625, + "loss_num": 0.05078125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 23447920, + "step": 354 + }, + { + "epoch": 0.033228810782983106, + "grad_norm": 23.80320167541504, + "learning_rate": 5e-05, + "loss": 2.1835, + "num_input_tokens_seen": 23514608, + "step": 355 + }, + { + "epoch": 0.033228810782983106, + "loss": 2.131493091583252, + "loss_ce": 0.013817189261317253, + "loss_iou": 0.86328125, + "loss_num": 0.07861328125, + "loss_xval": 2.125, + "num_input_tokens_seen": 23514608, + "step": 355 + }, + { + "epoch": 0.03332241306687883, + "grad_norm": 12.59532356262207, + "learning_rate": 5e-05, + "loss": 2.2421, + "num_input_tokens_seen": 23581888, + "step": 356 + }, + { + "epoch": 0.03332241306687883, + "loss": 2.2068097591400146, + "loss_ce": 0.0036847481969743967, + "loss_iou": 0.9375, + "loss_num": 0.0654296875, + "loss_xval": 2.203125, + "num_input_tokens_seen": 23581888, + "step": 356 + }, + { + "epoch": 0.033416015350774556, + "grad_norm": 45.76629638671875, + "learning_rate": 5e-05, + "loss": 2.0625, + "num_input_tokens_seen": 23648560, + "step": 357 + }, + { + "epoch": 0.033416015350774556, + "loss": 2.1402082443237305, + "loss_ce": 0.0044661033898591995, + "loss_iou": 0.91796875, + "loss_num": 0.059326171875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 23648560, + "step": 357 + }, + { + "epoch": 0.03350961763467029, + "grad_norm": 12.574625968933105, + "learning_rate": 5e-05, + "loss": 2.0878, + "num_input_tokens_seen": 23714048, + "step": 358 + }, + { + "epoch": 0.03350961763467029, + "loss": 2.0568721294403076, + "loss_ce": 0.0031611050944775343, + "loss_iou": 0.890625, + "loss_num": 0.05419921875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 23714048, + "step": 358 + }, + { + "epoch": 0.03360321991856601, + "grad_norm": 29.129892349243164, + "learning_rate": 5e-05, + "loss": 2.2022, + "num_input_tokens_seen": 23780536, + "step": 359 + }, + { + "epoch": 0.03360321991856601, + "loss": 2.2413330078125, + "loss_ce": 0.006957824341952801, + "loss_iou": 1.0078125, + "loss_num": 0.0439453125, + "loss_xval": 2.234375, + "num_input_tokens_seen": 23780536, + "step": 359 + }, + { + "epoch": 0.03369682220246174, + "grad_norm": 10.787083625793457, + "learning_rate": 5e-05, + "loss": 2.3958, + "num_input_tokens_seen": 23846064, + "step": 360 + }, + { + "epoch": 0.03369682220246174, + "loss": 2.3584518432617188, + "loss_ce": 0.004936321172863245, + "loss_iou": 0.9609375, + "loss_num": 0.08544921875, + "loss_xval": 2.359375, + "num_input_tokens_seen": 23846064, + "step": 360 + }, + { + "epoch": 0.03379042448635747, + "grad_norm": 26.4855899810791, + "learning_rate": 5e-05, + "loss": 2.231, + "num_input_tokens_seen": 23912776, + "step": 361 + }, + { + "epoch": 0.03379042448635747, + "loss": 2.137748956680298, + "loss_ce": 0.004936504643410444, + "loss_iou": 0.90234375, + "loss_num": 0.0654296875, + "loss_xval": 2.125, + "num_input_tokens_seen": 23912776, + "step": 361 + }, + { + "epoch": 0.03388402677025319, + "grad_norm": 10.368152618408203, + "learning_rate": 5e-05, + "loss": 1.9874, + "num_input_tokens_seen": 23978032, + "step": 362 + }, + { + "epoch": 0.03388402677025319, + "loss": 1.9442696571350098, + "loss_ce": 0.002741375006735325, + "loss_iou": 0.8046875, + "loss_num": 0.06640625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 23978032, + "step": 362 + }, + { + "epoch": 0.033977629054148924, + "grad_norm": 21.596967697143555, + "learning_rate": 5e-05, + "loss": 2.0939, + "num_input_tokens_seen": 24043472, + "step": 363 + }, + { + "epoch": 0.033977629054148924, + "loss": 1.9262769222259521, + "loss_ce": 0.001472302945330739, + "loss_iou": 0.8359375, + "loss_num": 0.05078125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 24043472, + "step": 363 + }, + { + "epoch": 0.03407123133804465, + "grad_norm": 10.161048889160156, + "learning_rate": 5e-05, + "loss": 2.0024, + "num_input_tokens_seen": 24109644, + "step": 364 + }, + { + "epoch": 0.03407123133804465, + "loss": 1.8891668319702148, + "loss_ce": 0.0029362887144088745, + "loss_iou": 0.8125, + "loss_num": 0.05224609375, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 24109644, + "step": 364 + }, + { + "epoch": 0.03416483362194037, + "grad_norm": 27.79102325439453, + "learning_rate": 5e-05, + "loss": 2.0082, + "num_input_tokens_seen": 24176184, + "step": 365 + }, + { + "epoch": 0.03416483362194037, + "loss": 2.2154271602630615, + "loss_ce": 0.0035131131298840046, + "loss_iou": 0.984375, + "loss_num": 0.049072265625, + "loss_xval": 2.21875, + "num_input_tokens_seen": 24176184, + "step": 365 + }, + { + "epoch": 0.034258435905836104, + "grad_norm": 10.18643569946289, + "learning_rate": 5e-05, + "loss": 2.6185, + "num_input_tokens_seen": 24241780, + "step": 366 + }, + { + "epoch": 0.034258435905836104, + "loss": 2.62292218208313, + "loss_ce": 0.007687842007726431, + "loss_iou": 1.078125, + "loss_num": 0.09033203125, + "loss_xval": 2.609375, + "num_input_tokens_seen": 24241780, + "step": 366 + }, + { + "epoch": 0.03435203818973183, + "grad_norm": 9.291022300720215, + "learning_rate": 5e-05, + "loss": 2.2128, + "num_input_tokens_seen": 24308588, + "step": 367 + }, + { + "epoch": 0.03435203818973183, + "loss": 2.3625073432922363, + "loss_ce": 0.005085327662527561, + "loss_iou": 0.97265625, + "loss_num": 0.08203125, + "loss_xval": 2.359375, + "num_input_tokens_seen": 24308588, + "step": 367 + }, + { + "epoch": 0.03444564047362755, + "grad_norm": 12.33276081085205, + "learning_rate": 5e-05, + "loss": 2.1762, + "num_input_tokens_seen": 24374416, + "step": 368 + }, + { + "epoch": 0.03444564047362755, + "loss": 2.2196903228759766, + "loss_ce": 0.006799739319831133, + "loss_iou": 0.921875, + "loss_num": 0.0732421875, + "loss_xval": 2.21875, + "num_input_tokens_seen": 24374416, + "step": 368 + }, + { + "epoch": 0.034539242757523285, + "grad_norm": 12.163825988769531, + "learning_rate": 5e-05, + "loss": 2.0329, + "num_input_tokens_seen": 24441288, + "step": 369 + }, + { + "epoch": 0.034539242757523285, + "loss": 2.129265069961548, + "loss_ce": 0.005241577047854662, + "loss_iou": 0.8828125, + "loss_num": 0.07275390625, + "loss_xval": 2.125, + "num_input_tokens_seen": 24441288, + "step": 369 + }, + { + "epoch": 0.03463284504141901, + "grad_norm": 19.660593032836914, + "learning_rate": 5e-05, + "loss": 1.9867, + "num_input_tokens_seen": 24505952, + "step": 370 + }, + { + "epoch": 0.03463284504141901, + "loss": 1.8676186800003052, + "loss_ce": 0.007633232045918703, + "loss_iou": 0.828125, + "loss_num": 0.040771484375, + "loss_xval": 1.859375, + "num_input_tokens_seen": 24505952, + "step": 370 + }, + { + "epoch": 0.03472644732531474, + "grad_norm": 8.436841011047363, + "learning_rate": 5e-05, + "loss": 2.3642, + "num_input_tokens_seen": 24572172, + "step": 371 + }, + { + "epoch": 0.03472644732531474, + "loss": 2.509275436401367, + "loss_ce": 0.005369182676076889, + "loss_iou": 1.0703125, + "loss_num": 0.07373046875, + "loss_xval": 2.5, + "num_input_tokens_seen": 24572172, + "step": 371 + }, + { + "epoch": 0.034820049609210466, + "grad_norm": 17.772293090820312, + "learning_rate": 5e-05, + "loss": 2.1125, + "num_input_tokens_seen": 24639288, + "step": 372 + }, + { + "epoch": 0.034820049609210466, + "loss": 2.1627039909362793, + "loss_ce": 0.005477518774569035, + "loss_iou": 0.91796875, + "loss_num": 0.06396484375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 24639288, + "step": 372 + }, + { + "epoch": 0.03491365189310619, + "grad_norm": 12.384817123413086, + "learning_rate": 5e-05, + "loss": 2.0017, + "num_input_tokens_seen": 24706620, + "step": 373 + }, + { + "epoch": 0.03491365189310619, + "loss": 1.9600626230239868, + "loss_ce": 0.004984484985470772, + "loss_iou": 0.8515625, + "loss_num": 0.05126953125, + "loss_xval": 1.953125, + "num_input_tokens_seen": 24706620, + "step": 373 + }, + { + "epoch": 0.03500725417700192, + "grad_norm": 39.35420227050781, + "learning_rate": 5e-05, + "loss": 2.3049, + "num_input_tokens_seen": 24773152, + "step": 374 + }, + { + "epoch": 0.03500725417700192, + "loss": 2.4104323387145996, + "loss_ce": 0.008088554255664349, + "loss_iou": 1.015625, + "loss_num": 0.0732421875, + "loss_xval": 2.40625, + "num_input_tokens_seen": 24773152, + "step": 374 + }, + { + "epoch": 0.035100856460897646, + "grad_norm": 29.24478530883789, + "learning_rate": 5e-05, + "loss": 1.9513, + "num_input_tokens_seen": 24839316, + "step": 375 + }, + { + "epoch": 0.035100856460897646, + "loss": 2.052703380584717, + "loss_ce": 0.0038751561660319567, + "loss_iou": 0.890625, + "loss_num": 0.05322265625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 24839316, + "step": 375 + }, + { + "epoch": 0.03519445874479337, + "grad_norm": 10.423100471496582, + "learning_rate": 5e-05, + "loss": 1.9495, + "num_input_tokens_seen": 24905412, + "step": 376 + }, + { + "epoch": 0.03519445874479337, + "loss": 2.009066104888916, + "loss_ce": 0.0012535951100289822, + "loss_iou": 0.84375, + "loss_num": 0.0634765625, + "loss_xval": 2.0, + "num_input_tokens_seen": 24905412, + "step": 376 + }, + { + "epoch": 0.0352880610286891, + "grad_norm": 21.374744415283203, + "learning_rate": 5e-05, + "loss": 2.0672, + "num_input_tokens_seen": 24971560, + "step": 377 + }, + { + "epoch": 0.0352880610286891, + "loss": 2.2741591930389404, + "loss_ce": 0.002674894407391548, + "loss_iou": 1.0078125, + "loss_num": 0.049560546875, + "loss_xval": 2.265625, + "num_input_tokens_seen": 24971560, + "step": 377 + }, + { + "epoch": 0.03538166331258483, + "grad_norm": 7.7313361167907715, + "learning_rate": 5e-05, + "loss": 2.3296, + "num_input_tokens_seen": 25037460, + "step": 378 + }, + { + "epoch": 0.03538166331258483, + "loss": 2.5008437633514404, + "loss_ce": 0.0067031835205852985, + "loss_iou": 1.0078125, + "loss_num": 0.0947265625, + "loss_xval": 2.5, + "num_input_tokens_seen": 25037460, + "step": 378 + }, + { + "epoch": 0.03547526559648055, + "grad_norm": 65.93770599365234, + "learning_rate": 5e-05, + "loss": 2.2821, + "num_input_tokens_seen": 25103504, + "step": 379 + }, + { + "epoch": 0.03547526559648055, + "loss": 2.2385544776916504, + "loss_ce": 0.00417946046218276, + "loss_iou": 0.9453125, + "loss_num": 0.06787109375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 25103504, + "step": 379 + }, + { + "epoch": 0.03556886788037628, + "grad_norm": 12.77132797241211, + "learning_rate": 5e-05, + "loss": 1.9231, + "num_input_tokens_seen": 25169852, + "step": 380 + }, + { + "epoch": 0.03556886788037628, + "loss": 1.8846288919448853, + "loss_ce": 0.004746079444885254, + "loss_iou": 0.7890625, + "loss_num": 0.0595703125, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 25169852, + "step": 380 + }, + { + "epoch": 0.03566247016427201, + "grad_norm": 22.883155822753906, + "learning_rate": 5e-05, + "loss": 2.1241, + "num_input_tokens_seen": 25236536, + "step": 381 + }, + { + "epoch": 0.03566247016427201, + "loss": 2.1614749431610107, + "loss_ce": 0.004248403944075108, + "loss_iou": 0.9296875, + "loss_num": 0.06005859375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 25236536, + "step": 381 + }, + { + "epoch": 0.03575607244816773, + "grad_norm": 19.95392608642578, + "learning_rate": 5e-05, + "loss": 2.5629, + "num_input_tokens_seen": 25302256, + "step": 382 + }, + { + "epoch": 0.03575607244816773, + "loss": 2.630160331726074, + "loss_ce": 0.0032072500325739384, + "loss_iou": 1.1328125, + "loss_num": 0.07177734375, + "loss_xval": 2.625, + "num_input_tokens_seen": 25302256, + "step": 382 + }, + { + "epoch": 0.03584967473206346, + "grad_norm": 9.674190521240234, + "learning_rate": 5e-05, + "loss": 2.3442, + "num_input_tokens_seen": 25366992, + "step": 383 + }, + { + "epoch": 0.03584967473206346, + "loss": 2.3065168857574463, + "loss_ce": 0.0018293661996722221, + "loss_iou": 0.9453125, + "loss_num": 0.08349609375, + "loss_xval": 2.3125, + "num_input_tokens_seen": 25366992, + "step": 383 + }, + { + "epoch": 0.03594327701595919, + "grad_norm": 19.20514488220215, + "learning_rate": 5e-05, + "loss": 1.8755, + "num_input_tokens_seen": 25432540, + "step": 384 + }, + { + "epoch": 0.03594327701595919, + "loss": 1.8492238521575928, + "loss_ce": 0.003520740196108818, + "loss_iou": 0.796875, + "loss_num": 0.050048828125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 25432540, + "step": 384 + }, + { + "epoch": 0.03603687929985492, + "grad_norm": 20.697628021240234, + "learning_rate": 5e-05, + "loss": 1.9476, + "num_input_tokens_seen": 25498652, + "step": 385 + }, + { + "epoch": 0.03603687929985492, + "loss": 2.1511905193328857, + "loss_ce": 0.016424886882305145, + "loss_iou": 0.921875, + "loss_num": 0.0576171875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 25498652, + "step": 385 + }, + { + "epoch": 0.036130481583750644, + "grad_norm": 13.417241096496582, + "learning_rate": 5e-05, + "loss": 2.2559, + "num_input_tokens_seen": 25565424, + "step": 386 + }, + { + "epoch": 0.036130481583750644, + "loss": 2.297234535217285, + "loss_ce": 0.004265897441655397, + "loss_iou": 0.9765625, + "loss_num": 0.0673828125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 25565424, + "step": 386 + }, + { + "epoch": 0.03622408386764637, + "grad_norm": 15.320602416992188, + "learning_rate": 5e-05, + "loss": 1.7418, + "num_input_tokens_seen": 25632460, + "step": 387 + }, + { + "epoch": 0.03622408386764637, + "loss": 1.7443631887435913, + "loss_ce": 0.00412881001830101, + "loss_iou": 0.765625, + "loss_num": 0.041259765625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 25632460, + "step": 387 + }, + { + "epoch": 0.0363176861515421, + "grad_norm": 21.39435386657715, + "learning_rate": 5e-05, + "loss": 2.3779, + "num_input_tokens_seen": 25698924, + "step": 388 + }, + { + "epoch": 0.0363176861515421, + "loss": 2.4170782566070557, + "loss_ce": 0.004968872293829918, + "loss_iou": 1.078125, + "loss_num": 0.05224609375, + "loss_xval": 2.40625, + "num_input_tokens_seen": 25698924, + "step": 388 + }, + { + "epoch": 0.036411288435437825, + "grad_norm": 11.618865966796875, + "learning_rate": 5e-05, + "loss": 2.3603, + "num_input_tokens_seen": 25765596, + "step": 389 + }, + { + "epoch": 0.036411288435437825, + "loss": 2.6621227264404297, + "loss_ce": 0.003919581882655621, + "loss_iou": 1.0546875, + "loss_num": 0.109375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 25765596, + "step": 389 + }, + { + "epoch": 0.03650489071933355, + "grad_norm": 13.078133583068848, + "learning_rate": 5e-05, + "loss": 2.1917, + "num_input_tokens_seen": 25832992, + "step": 390 + }, + { + "epoch": 0.03650489071933355, + "loss": 2.1810710430145264, + "loss_ce": 0.006266321986913681, + "loss_iou": 0.92578125, + "loss_num": 0.06396484375, + "loss_xval": 2.171875, + "num_input_tokens_seen": 25832992, + "step": 390 + }, + { + "epoch": 0.03659849300322928, + "grad_norm": 11.813979148864746, + "learning_rate": 5e-05, + "loss": 1.9176, + "num_input_tokens_seen": 25898552, + "step": 391 + }, + { + "epoch": 0.03659849300322928, + "loss": 1.9757747650146484, + "loss_ce": 0.005071574356406927, + "loss_iou": 0.80078125, + "loss_num": 0.07421875, + "loss_xval": 1.96875, + "num_input_tokens_seen": 25898552, + "step": 391 + }, + { + "epoch": 0.036692095287125005, + "grad_norm": 25.330154418945312, + "learning_rate": 5e-05, + "loss": 1.9453, + "num_input_tokens_seen": 25965580, + "step": 392 + }, + { + "epoch": 0.036692095287125005, + "loss": 2.0348823070526123, + "loss_ce": 0.006561874412000179, + "loss_iou": 0.90625, + "loss_num": 0.04248046875, + "loss_xval": 2.03125, + "num_input_tokens_seen": 25965580, + "step": 392 + }, + { + "epoch": 0.03678569757102073, + "grad_norm": 9.008604049682617, + "learning_rate": 5e-05, + "loss": 2.5008, + "num_input_tokens_seen": 26031208, + "step": 393 + }, + { + "epoch": 0.03678569757102073, + "loss": 2.6614603996276855, + "loss_ce": 0.003257141914218664, + "loss_iou": 1.1015625, + "loss_num": 0.091796875, + "loss_xval": 2.65625, + "num_input_tokens_seen": 26031208, + "step": 393 + }, + { + "epoch": 0.03687929985491646, + "grad_norm": 14.616659164428711, + "learning_rate": 5e-05, + "loss": 2.2705, + "num_input_tokens_seen": 26096588, + "step": 394 + }, + { + "epoch": 0.03687929985491646, + "loss": 2.3706624507904053, + "loss_ce": 0.0015218132175505161, + "loss_iou": 1.0078125, + "loss_num": 0.0693359375, + "loss_xval": 2.375, + "num_input_tokens_seen": 26096588, + "step": 394 + }, + { + "epoch": 0.036972902138812186, + "grad_norm": 8.542869567871094, + "learning_rate": 5e-05, + "loss": 2.0632, + "num_input_tokens_seen": 26163324, + "step": 395 + }, + { + "epoch": 0.036972902138812186, + "loss": 2.0058720111846924, + "loss_ce": 0.001965888310223818, + "loss_iou": 0.875, + "loss_num": 0.049560546875, + "loss_xval": 2.0, + "num_input_tokens_seen": 26163324, + "step": 395 + }, + { + "epoch": 0.03706650442270792, + "grad_norm": 13.70307445526123, + "learning_rate": 5e-05, + "loss": 2.1156, + "num_input_tokens_seen": 26229608, + "step": 396 + }, + { + "epoch": 0.03706650442270792, + "loss": 2.2452762126922607, + "loss_ce": 0.006018448621034622, + "loss_iou": 0.9765625, + "loss_num": 0.057861328125, + "loss_xval": 2.234375, + "num_input_tokens_seen": 26229608, + "step": 396 + }, + { + "epoch": 0.03716010670660364, + "grad_norm": 68.48944091796875, + "learning_rate": 5e-05, + "loss": 1.8305, + "num_input_tokens_seen": 26294672, + "step": 397 + }, + { + "epoch": 0.03716010670660364, + "loss": 1.8785256147384644, + "loss_ce": 0.0054787942208349705, + "loss_iou": 0.77734375, + "loss_num": 0.06396484375, + "loss_xval": 1.875, + "num_input_tokens_seen": 26294672, + "step": 397 + }, + { + "epoch": 0.037253708990499367, + "grad_norm": 15.26705551147461, + "learning_rate": 5e-05, + "loss": 2.0057, + "num_input_tokens_seen": 26361136, + "step": 398 + }, + { + "epoch": 0.037253708990499367, + "loss": 2.0479254722595215, + "loss_ce": 0.010816069319844246, + "loss_iou": 0.89453125, + "loss_num": 0.050048828125, + "loss_xval": 2.03125, + "num_input_tokens_seen": 26361136, + "step": 398 + }, + { + "epoch": 0.0373473112743951, + "grad_norm": 11.96884822845459, + "learning_rate": 5e-05, + "loss": 2.2064, + "num_input_tokens_seen": 26428544, + "step": 399 + }, + { + "epoch": 0.0373473112743951, + "loss": 2.3468685150146484, + "loss_ce": 0.0070247529074549675, + "loss_iou": 1.015625, + "loss_num": 0.0634765625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 26428544, + "step": 399 + }, + { + "epoch": 0.03744091355829082, + "grad_norm": 13.7611722946167, + "learning_rate": 5e-05, + "loss": 2.1004, + "num_input_tokens_seen": 26495640, + "step": 400 + }, + { + "epoch": 0.03744091355829082, + "loss": 2.0225229263305664, + "loss_ce": 0.007874608039855957, + "loss_iou": 0.875, + "loss_num": 0.052490234375, + "loss_xval": 2.015625, + "num_input_tokens_seen": 26495640, + "step": 400 + }, + { + "epoch": 0.03753451584218655, + "grad_norm": 83.84553527832031, + "learning_rate": 5e-05, + "loss": 1.7903, + "num_input_tokens_seen": 26561068, + "step": 401 + }, + { + "epoch": 0.03753451584218655, + "loss": 1.8230633735656738, + "loss_ce": 0.003727492643520236, + "loss_iou": 0.8046875, + "loss_num": 0.04248046875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 26561068, + "step": 401 + }, + { + "epoch": 0.03762811812608228, + "grad_norm": 19.10430145263672, + "learning_rate": 5e-05, + "loss": 1.9353, + "num_input_tokens_seen": 26626836, + "step": 402 + }, + { + "epoch": 0.03762811812608228, + "loss": 1.8824224472045898, + "loss_ce": 0.00174605508800596, + "loss_iou": 0.81640625, + "loss_num": 0.050048828125, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 26626836, + "step": 402 + }, + { + "epoch": 0.037721720409978, + "grad_norm": 22.448139190673828, + "learning_rate": 5e-05, + "loss": 1.8249, + "num_input_tokens_seen": 26693028, + "step": 403 + }, + { + "epoch": 0.037721720409978, + "loss": 1.768705129623413, + "loss_ce": 0.0043009137734770775, + "loss_iou": 0.75390625, + "loss_num": 0.05126953125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 26693028, + "step": 403 + }, + { + "epoch": 0.03781532269387373, + "grad_norm": 10.823797225952148, + "learning_rate": 5e-05, + "loss": 2.4316, + "num_input_tokens_seen": 26759472, + "step": 404 + }, + { + "epoch": 0.03781532269387373, + "loss": 2.3532376289367676, + "loss_ce": 0.008022695779800415, + "loss_iou": 0.9375, + "loss_num": 0.09423828125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 26759472, + "step": 404 + }, + { + "epoch": 0.03790892497776946, + "grad_norm": 14.573873519897461, + "learning_rate": 5e-05, + "loss": 1.831, + "num_input_tokens_seen": 26825124, + "step": 405 + }, + { + "epoch": 0.03790892497776946, + "loss": 1.9456989765167236, + "loss_ce": 0.0074664149433374405, + "loss_iou": 0.8046875, + "loss_num": 0.0654296875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 26825124, + "step": 405 + }, + { + "epoch": 0.038002527261665184, + "grad_norm": 9.012618064880371, + "learning_rate": 5e-05, + "loss": 2.1544, + "num_input_tokens_seen": 26891976, + "step": 406 + }, + { + "epoch": 0.038002527261665184, + "loss": 2.2486791610717773, + "loss_ce": 0.007468043826520443, + "loss_iou": 0.9453125, + "loss_num": 0.0712890625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 26891976, + "step": 406 + }, + { + "epoch": 0.03809612954556091, + "grad_norm": 18.2127742767334, + "learning_rate": 5e-05, + "loss": 2.1004, + "num_input_tokens_seen": 26957404, + "step": 407 + }, + { + "epoch": 0.03809612954556091, + "loss": 2.109196186065674, + "loss_ce": 0.0047041852958500385, + "loss_iou": 0.8828125, + "loss_num": 0.0673828125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 26957404, + "step": 407 + }, + { + "epoch": 0.03818973182945664, + "grad_norm": 16.539289474487305, + "learning_rate": 5e-05, + "loss": 2.113, + "num_input_tokens_seen": 27022952, + "step": 408 + }, + { + "epoch": 0.03818973182945664, + "loss": 2.0526235103607178, + "loss_ce": 0.004039513412863016, + "loss_iou": 0.8515625, + "loss_num": 0.06982421875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 27022952, + "step": 408 + }, + { + "epoch": 0.038283334113352364, + "grad_norm": 9.839387893676758, + "learning_rate": 5e-05, + "loss": 2.0395, + "num_input_tokens_seen": 27089496, + "step": 409 + }, + { + "epoch": 0.038283334113352364, + "loss": 1.8633708953857422, + "loss_ce": 0.003019391791895032, + "loss_iou": 0.80859375, + "loss_num": 0.048828125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 27089496, + "step": 409 + }, + { + "epoch": 0.038376936397248096, + "grad_norm": 17.634851455688477, + "learning_rate": 5e-05, + "loss": 1.6128, + "num_input_tokens_seen": 27155404, + "step": 410 + }, + { + "epoch": 0.038376936397248096, + "loss": 1.4755079746246338, + "loss_ce": 0.0012646487448364496, + "loss_iou": 0.63671875, + "loss_num": 0.04052734375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 27155404, + "step": 410 + }, + { + "epoch": 0.03847053868114382, + "grad_norm": 11.373767852783203, + "learning_rate": 5e-05, + "loss": 2.2504, + "num_input_tokens_seen": 27222132, + "step": 411 + }, + { + "epoch": 0.03847053868114382, + "loss": 2.19195556640625, + "loss_ce": 0.005432123318314552, + "loss_iou": 0.953125, + "loss_num": 0.056640625, + "loss_xval": 2.1875, + "num_input_tokens_seen": 27222132, + "step": 411 + }, + { + "epoch": 0.038564140965039545, + "grad_norm": 12.374197959899902, + "learning_rate": 5e-05, + "loss": 2.0242, + "num_input_tokens_seen": 27287484, + "step": 412 + }, + { + "epoch": 0.038564140965039545, + "loss": 2.068218469619751, + "loss_ce": 0.005718581844121218, + "loss_iou": 0.8515625, + "loss_num": 0.072265625, + "loss_xval": 2.0625, + "num_input_tokens_seen": 27287484, + "step": 412 + }, + { + "epoch": 0.038657743248935277, + "grad_norm": 21.252538681030273, + "learning_rate": 5e-05, + "loss": 1.8521, + "num_input_tokens_seen": 27353856, + "step": 413 + }, + { + "epoch": 0.038657743248935277, + "loss": 1.5962870121002197, + "loss_ce": 0.00571082066744566, + "loss_iou": 0.68359375, + "loss_num": 0.044677734375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 27353856, + "step": 413 + }, + { + "epoch": 0.038751345532831, + "grad_norm": 10.730375289916992, + "learning_rate": 5e-05, + "loss": 2.4089, + "num_input_tokens_seen": 27419808, + "step": 414 + }, + { + "epoch": 0.038751345532831, + "loss": 2.4442930221557617, + "loss_ce": 0.0028866769280284643, + "loss_iou": 1.015625, + "loss_num": 0.0830078125, + "loss_xval": 2.4375, + "num_input_tokens_seen": 27419808, + "step": 414 + }, + { + "epoch": 0.038844947816726726, + "grad_norm": 14.683554649353027, + "learning_rate": 5e-05, + "loss": 2.0207, + "num_input_tokens_seen": 27486244, + "step": 415 + }, + { + "epoch": 0.038844947816726726, + "loss": 1.9977325201034546, + "loss_ce": 0.01042783074080944, + "loss_iou": 0.8828125, + "loss_num": 0.04345703125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 27486244, + "step": 415 + }, + { + "epoch": 0.03893855010062246, + "grad_norm": 16.708295822143555, + "learning_rate": 5e-05, + "loss": 1.8847, + "num_input_tokens_seen": 27552460, + "step": 416 + }, + { + "epoch": 0.03893855010062246, + "loss": 1.726832389831543, + "loss_ce": 0.0036878606770187616, + "loss_iou": 0.73046875, + "loss_num": 0.0517578125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 27552460, + "step": 416 + }, + { + "epoch": 0.03903215238451818, + "grad_norm": 18.461673736572266, + "learning_rate": 5e-05, + "loss": 2.2866, + "num_input_tokens_seen": 27618188, + "step": 417 + }, + { + "epoch": 0.03903215238451818, + "loss": 2.2025556564331055, + "loss_ce": 0.0033370940946042538, + "loss_iou": 0.9375, + "loss_num": 0.064453125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 27618188, + "step": 417 + }, + { + "epoch": 0.039125754668413906, + "grad_norm": 17.931636810302734, + "learning_rate": 5e-05, + "loss": 1.8517, + "num_input_tokens_seen": 27685044, + "step": 418 + }, + { + "epoch": 0.039125754668413906, + "loss": 1.9908952713012695, + "loss_ce": 0.004567192401736975, + "loss_iou": 0.875, + "loss_num": 0.046630859375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 27685044, + "step": 418 + }, + { + "epoch": 0.03921935695230964, + "grad_norm": 23.1167049407959, + "learning_rate": 5e-05, + "loss": 2.1168, + "num_input_tokens_seen": 27750588, + "step": 419 + }, + { + "epoch": 0.03921935695230964, + "loss": 2.1894702911376953, + "loss_ce": 0.004900074098259211, + "loss_iou": 0.94140625, + "loss_num": 0.059326171875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 27750588, + "step": 419 + }, + { + "epoch": 0.03931295923620536, + "grad_norm": 15.535375595092773, + "learning_rate": 5e-05, + "loss": 2.5563, + "num_input_tokens_seen": 27817228, + "step": 420 + }, + { + "epoch": 0.03931295923620536, + "loss": 2.63668155670166, + "loss_ce": 0.0077751874923706055, + "loss_iou": 1.078125, + "loss_num": 0.09423828125, + "loss_xval": 2.625, + "num_input_tokens_seen": 27817228, + "step": 420 + }, + { + "epoch": 0.039406561520101094, + "grad_norm": 10.00205135345459, + "learning_rate": 5e-05, + "loss": 2.0777, + "num_input_tokens_seen": 27883472, + "step": 421 + }, + { + "epoch": 0.039406561520101094, + "loss": 2.1320810317993164, + "loss_ce": 0.0031747817993164062, + "loss_iou": 0.9140625, + "loss_num": 0.059814453125, + "loss_xval": 2.125, + "num_input_tokens_seen": 27883472, + "step": 421 + }, + { + "epoch": 0.03950016380399682, + "grad_norm": 14.320682525634766, + "learning_rate": 5e-05, + "loss": 2.0225, + "num_input_tokens_seen": 27949516, + "step": 422 + }, + { + "epoch": 0.03950016380399682, + "loss": 1.870326042175293, + "loss_ce": 0.0065565044060349464, + "loss_iou": 0.7265625, + "loss_num": 0.08203125, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 27949516, + "step": 422 + }, + { + "epoch": 0.03959376608789254, + "grad_norm": 21.98247718811035, + "learning_rate": 5e-05, + "loss": 1.9695, + "num_input_tokens_seen": 28014724, + "step": 423 + }, + { + "epoch": 0.03959376608789254, + "loss": 2.2499985694885254, + "loss_ce": 0.004881343804299831, + "loss_iou": 0.9453125, + "loss_num": 0.0712890625, + "loss_xval": 2.25, + "num_input_tokens_seen": 28014724, + "step": 423 + }, + { + "epoch": 0.039687368371788274, + "grad_norm": 329.8367614746094, + "learning_rate": 5e-05, + "loss": 2.349, + "num_input_tokens_seen": 28081584, + "step": 424 + }, + { + "epoch": 0.039687368371788274, + "loss": 2.358642578125, + "loss_ce": 0.0031738688703626394, + "loss_iou": 0.9921875, + "loss_num": 0.07470703125, + "loss_xval": 2.359375, + "num_input_tokens_seen": 28081584, + "step": 424 + }, + { + "epoch": 0.039780970655684, + "grad_norm": 10.797112464904785, + "learning_rate": 5e-05, + "loss": 2.1071, + "num_input_tokens_seen": 28147424, + "step": 425 + }, + { + "epoch": 0.039780970655684, + "loss": 2.0887904167175293, + "loss_ce": 0.006759242154657841, + "loss_iou": 0.87890625, + "loss_num": 0.064453125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 28147424, + "step": 425 + }, + { + "epoch": 0.039874572939579724, + "grad_norm": 10.683431625366211, + "learning_rate": 5e-05, + "loss": 1.9732, + "num_input_tokens_seen": 28214152, + "step": 426 + }, + { + "epoch": 0.039874572939579724, + "loss": 1.9705636501312256, + "loss_ce": 0.0037668293807655573, + "loss_iou": 0.83984375, + "loss_num": 0.057373046875, + "loss_xval": 1.96875, + "num_input_tokens_seen": 28214152, + "step": 426 + }, + { + "epoch": 0.039968175223475455, + "grad_norm": 30.997591018676758, + "learning_rate": 5e-05, + "loss": 1.9126, + "num_input_tokens_seen": 28280776, + "step": 427 + }, + { + "epoch": 0.039968175223475455, + "loss": 1.9151159524917603, + "loss_ce": 0.004959717858582735, + "loss_iou": 0.796875, + "loss_num": 0.06396484375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 28280776, + "step": 427 + }, + { + "epoch": 0.04006177750737118, + "grad_norm": 20.698631286621094, + "learning_rate": 5e-05, + "loss": 2.2508, + "num_input_tokens_seen": 28346732, + "step": 428 + }, + { + "epoch": 0.04006177750737118, + "loss": 2.416332244873047, + "loss_ce": 0.004222821444272995, + "loss_iou": 1.0234375, + "loss_num": 0.072265625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 28346732, + "step": 428 + }, + { + "epoch": 0.040155379791266904, + "grad_norm": 458.5082092285156, + "learning_rate": 5e-05, + "loss": 1.7608, + "num_input_tokens_seen": 28412388, + "step": 429 + }, + { + "epoch": 0.040155379791266904, + "loss": 1.812030553817749, + "loss_ce": 0.001483736908994615, + "loss_iou": 0.7734375, + "loss_num": 0.05322265625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 28412388, + "step": 429 + }, + { + "epoch": 0.040248982075162636, + "grad_norm": 16.114900588989258, + "learning_rate": 5e-05, + "loss": 1.9463, + "num_input_tokens_seen": 28478760, + "step": 430 + }, + { + "epoch": 0.040248982075162636, + "loss": 1.9977428913116455, + "loss_ce": 0.0026255943812429905, + "loss_iou": 0.875, + "loss_num": 0.04931640625, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 28478760, + "step": 430 + }, + { + "epoch": 0.04034258435905836, + "grad_norm": 13.54692554473877, + "learning_rate": 5e-05, + "loss": 2.1163, + "num_input_tokens_seen": 28545792, + "step": 431 + }, + { + "epoch": 0.04034258435905836, + "loss": 2.117304801940918, + "loss_ce": 0.005976623855531216, + "loss_iou": 0.89453125, + "loss_num": 0.0654296875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 28545792, + "step": 431 + }, + { + "epoch": 0.040436186642954085, + "grad_norm": 19.964017868041992, + "learning_rate": 5e-05, + "loss": 1.8118, + "num_input_tokens_seen": 28611936, + "step": 432 + }, + { + "epoch": 0.040436186642954085, + "loss": 1.8202134370803833, + "loss_ce": 0.004295459948480129, + "loss_iou": 0.7265625, + "loss_num": 0.07177734375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 28611936, + "step": 432 + }, + { + "epoch": 0.040529788926849816, + "grad_norm": 13.176482200622559, + "learning_rate": 5e-05, + "loss": 2.2294, + "num_input_tokens_seen": 28678992, + "step": 433 + }, + { + "epoch": 0.040529788926849816, + "loss": 2.1980347633361816, + "loss_ce": 0.005651961546391249, + "loss_iou": 0.921875, + "loss_num": 0.0693359375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 28678992, + "step": 433 + }, + { + "epoch": 0.04062339121074554, + "grad_norm": 19.492185592651367, + "learning_rate": 5e-05, + "loss": 1.88, + "num_input_tokens_seen": 28745540, + "step": 434 + }, + { + "epoch": 0.04062339121074554, + "loss": 1.765675663948059, + "loss_ce": 0.0029803107026964426, + "loss_iou": 0.7109375, + "loss_num": 0.0673828125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 28745540, + "step": 434 + }, + { + "epoch": 0.04071699349464127, + "grad_norm": 10.830259323120117, + "learning_rate": 5e-05, + "loss": 1.8285, + "num_input_tokens_seen": 28812080, + "step": 435 + }, + { + "epoch": 0.04071699349464127, + "loss": 1.8863437175750732, + "loss_ce": 0.011343682184815407, + "loss_iou": 0.80859375, + "loss_num": 0.051513671875, + "loss_xval": 1.875, + "num_input_tokens_seen": 28812080, + "step": 435 + }, + { + "epoch": 0.040810595778537, + "grad_norm": 19.71128273010254, + "learning_rate": 5e-05, + "loss": 2.28, + "num_input_tokens_seen": 28879532, + "step": 436 + }, + { + "epoch": 0.040810595778537, + "loss": 2.296851396560669, + "loss_ce": 0.007788877934217453, + "loss_iou": 0.9921875, + "loss_num": 0.061767578125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 28879532, + "step": 436 + }, + { + "epoch": 0.04090419806243272, + "grad_norm": 35.30091094970703, + "learning_rate": 5e-05, + "loss": 2.5773, + "num_input_tokens_seen": 28946524, + "step": 437 + }, + { + "epoch": 0.04090419806243272, + "loss": 2.2610809803009033, + "loss_ce": 0.002291942248120904, + "loss_iou": 0.9609375, + "loss_num": 0.0673828125, + "loss_xval": 2.265625, + "num_input_tokens_seen": 28946524, + "step": 437 + }, + { + "epoch": 0.04099780034632845, + "grad_norm": 14.956010818481445, + "learning_rate": 5e-05, + "loss": 2.1915, + "num_input_tokens_seen": 29014132, + "step": 438 + }, + { + "epoch": 0.04099780034632845, + "loss": 2.039963483810425, + "loss_ce": 0.0057838065549731255, + "loss_iou": 0.8515625, + "loss_num": 0.06640625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 29014132, + "step": 438 + }, + { + "epoch": 0.04109140263022418, + "grad_norm": 10.634124755859375, + "learning_rate": 5e-05, + "loss": 1.9611, + "num_input_tokens_seen": 29081008, + "step": 439 + }, + { + "epoch": 0.04109140263022418, + "loss": 2.0453054904937744, + "loss_ce": 0.005754796788096428, + "loss_iou": 0.828125, + "loss_num": 0.0771484375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 29081008, + "step": 439 + }, + { + "epoch": 0.0411850049141199, + "grad_norm": 12.619905471801758, + "learning_rate": 5e-05, + "loss": 1.8783, + "num_input_tokens_seen": 29148312, + "step": 440 + }, + { + "epoch": 0.0411850049141199, + "loss": 1.8701744079589844, + "loss_ce": 0.004940034355968237, + "loss_iou": 0.8203125, + "loss_num": 0.04541015625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 29148312, + "step": 440 + }, + { + "epoch": 0.041278607198015634, + "grad_norm": 13.43472957611084, + "learning_rate": 5e-05, + "loss": 2.1071, + "num_input_tokens_seen": 29213756, + "step": 441 + }, + { + "epoch": 0.041278607198015634, + "loss": 2.1046857833862305, + "loss_ce": 0.002390938112512231, + "loss_iou": 0.86328125, + "loss_num": 0.0751953125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 29213756, + "step": 441 + }, + { + "epoch": 0.04137220948191136, + "grad_norm": 15.941155433654785, + "learning_rate": 5e-05, + "loss": 1.8279, + "num_input_tokens_seen": 29279772, + "step": 442 + }, + { + "epoch": 0.04137220948191136, + "loss": 1.9232045412063599, + "loss_ce": 0.0052358005195856094, + "loss_iou": 0.81640625, + "loss_num": 0.057861328125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 29279772, + "step": 442 + }, + { + "epoch": 0.04146581176580708, + "grad_norm": 28.43421745300293, + "learning_rate": 5e-05, + "loss": 2.2101, + "num_input_tokens_seen": 29346800, + "step": 443 + }, + { + "epoch": 0.04146581176580708, + "loss": 2.252653121948242, + "loss_ce": 0.006559483706951141, + "loss_iou": 1.015625, + "loss_num": 0.041259765625, + "loss_xval": 2.25, + "num_input_tokens_seen": 29346800, + "step": 443 + }, + { + "epoch": 0.041559414049702814, + "grad_norm": 13.325878143310547, + "learning_rate": 5e-05, + "loss": 2.6031, + "num_input_tokens_seen": 29413804, + "step": 444 + }, + { + "epoch": 0.041559414049702814, + "loss": 2.45267915725708, + "loss_ce": 0.005413481034338474, + "loss_iou": 1.0078125, + "loss_num": 0.08642578125, + "loss_xval": 2.453125, + "num_input_tokens_seen": 29413804, + "step": 444 + }, + { + "epoch": 0.04165301633359854, + "grad_norm": 9.816612243652344, + "learning_rate": 5e-05, + "loss": 2.0703, + "num_input_tokens_seen": 29479516, + "step": 445 + }, + { + "epoch": 0.04165301633359854, + "loss": 1.9830279350280762, + "loss_ce": 0.007441938854753971, + "loss_iou": 0.76953125, + "loss_num": 0.087890625, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 29479516, + "step": 445 + }, + { + "epoch": 0.04174661861749427, + "grad_norm": 15.803251266479492, + "learning_rate": 5e-05, + "loss": 2.0669, + "num_input_tokens_seen": 29546352, + "step": 446 + }, + { + "epoch": 0.04174661861749427, + "loss": 2.098733901977539, + "loss_ce": 0.006936904042959213, + "loss_iou": 0.87890625, + "loss_num": 0.06689453125, + "loss_xval": 2.09375, + "num_input_tokens_seen": 29546352, + "step": 446 + }, + { + "epoch": 0.041840220901389995, + "grad_norm": 22.669797897338867, + "learning_rate": 5e-05, + "loss": 2.1419, + "num_input_tokens_seen": 29613220, + "step": 447 + }, + { + "epoch": 0.041840220901389995, + "loss": 2.2310471534729004, + "loss_ce": 0.005461185239255428, + "loss_iou": 0.95703125, + "loss_num": 0.0625, + "loss_xval": 2.21875, + "num_input_tokens_seen": 29613220, + "step": 447 + }, + { + "epoch": 0.04193382318528572, + "grad_norm": 102.99943542480469, + "learning_rate": 5e-05, + "loss": 2.2529, + "num_input_tokens_seen": 29679504, + "step": 448 + }, + { + "epoch": 0.04193382318528572, + "loss": 2.233792543411255, + "loss_ce": 0.0023472788743674755, + "loss_iou": 0.94140625, + "loss_num": 0.06982421875, + "loss_xval": 2.234375, + "num_input_tokens_seen": 29679504, + "step": 448 + }, + { + "epoch": 0.04202742546918145, + "grad_norm": 13.938831329345703, + "learning_rate": 5e-05, + "loss": 2.0361, + "num_input_tokens_seen": 29746832, + "step": 449 + }, + { + "epoch": 0.04202742546918145, + "loss": 2.0519959926605225, + "loss_ce": 0.005120845511555672, + "loss_iou": 0.875, + "loss_num": 0.058837890625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 29746832, + "step": 449 + }, + { + "epoch": 0.042121027753077175, + "grad_norm": 17.788707733154297, + "learning_rate": 5e-05, + "loss": 2.2705, + "num_input_tokens_seen": 29812948, + "step": 450 + }, + { + "epoch": 0.042121027753077175, + "loss": 2.2611751556396484, + "loss_ce": 0.003362696385011077, + "loss_iou": 0.9453125, + "loss_num": 0.072265625, + "loss_xval": 2.25, + "num_input_tokens_seen": 29812948, + "step": 450 + }, + { + "epoch": 0.0422146300369729, + "grad_norm": 14.109766006469727, + "learning_rate": 5e-05, + "loss": 2.3016, + "num_input_tokens_seen": 29878596, + "step": 451 + }, + { + "epoch": 0.0422146300369729, + "loss": 2.1837148666381836, + "loss_ce": 0.0030509615316987038, + "loss_iou": 0.94140625, + "loss_num": 0.060546875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 29878596, + "step": 451 + }, + { + "epoch": 0.04230823232086863, + "grad_norm": 17.93331527709961, + "learning_rate": 5e-05, + "loss": 1.7842, + "num_input_tokens_seen": 29945328, + "step": 452 + }, + { + "epoch": 0.04230823232086863, + "loss": 1.9210729598999023, + "loss_ce": 0.003104252042248845, + "loss_iou": 0.85546875, + "loss_num": 0.0419921875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 29945328, + "step": 452 + }, + { + "epoch": 0.042401834604764356, + "grad_norm": 19.0549373626709, + "learning_rate": 5e-05, + "loss": 1.7662, + "num_input_tokens_seen": 30011836, + "step": 453 + }, + { + "epoch": 0.042401834604764356, + "loss": 1.575378179550171, + "loss_ce": 0.005798084661364555, + "loss_iou": 0.6640625, + "loss_num": 0.047607421875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 30011836, + "step": 453 + }, + { + "epoch": 0.04249543688866008, + "grad_norm": 17.29096221923828, + "learning_rate": 5e-05, + "loss": 2.1239, + "num_input_tokens_seen": 30077988, + "step": 454 + }, + { + "epoch": 0.04249543688866008, + "loss": 1.9790494441986084, + "loss_ce": 0.002486966550350189, + "loss_iou": 0.8515625, + "loss_num": 0.0537109375, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 30077988, + "step": 454 + }, + { + "epoch": 0.04258903917255581, + "grad_norm": 20.69436264038086, + "learning_rate": 5e-05, + "loss": 1.9971, + "num_input_tokens_seen": 30143916, + "step": 455 + }, + { + "epoch": 0.04258903917255581, + "loss": 1.8437339067459106, + "loss_ce": 0.005843220744282007, + "loss_iou": 0.75, + "loss_num": 0.0673828125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 30143916, + "step": 455 + }, + { + "epoch": 0.04268264145645154, + "grad_norm": 113.77708435058594, + "learning_rate": 5e-05, + "loss": 1.7839, + "num_input_tokens_seen": 30209432, + "step": 456 + }, + { + "epoch": 0.04268264145645154, + "loss": 1.7519501447677612, + "loss_ce": 0.008541999384760857, + "loss_iou": 0.671875, + "loss_num": 0.0791015625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 30209432, + "step": 456 + }, + { + "epoch": 0.04277624374034726, + "grad_norm": 13.520759582519531, + "learning_rate": 5e-05, + "loss": 1.9484, + "num_input_tokens_seen": 30275356, + "step": 457 + }, + { + "epoch": 0.04277624374034726, + "loss": 1.990700364112854, + "loss_ce": 0.005348884034901857, + "loss_iou": 0.84765625, + "loss_num": 0.0576171875, + "loss_xval": 1.984375, + "num_input_tokens_seen": 30275356, + "step": 457 + }, + { + "epoch": 0.04286984602424299, + "grad_norm": 19.00589370727539, + "learning_rate": 5e-05, + "loss": 1.731, + "num_input_tokens_seen": 30341892, + "step": 458 + }, + { + "epoch": 0.04286984602424299, + "loss": 1.6080878973007202, + "loss_ce": 0.0018257052870467305, + "loss_iou": 0.703125, + "loss_num": 0.039306640625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 30341892, + "step": 458 + }, + { + "epoch": 0.04296344830813872, + "grad_norm": 12.291425704956055, + "learning_rate": 5e-05, + "loss": 2.3168, + "num_input_tokens_seen": 30408404, + "step": 459 + }, + { + "epoch": 0.04296344830813872, + "loss": 2.277008056640625, + "loss_ce": 0.0055235326290130615, + "loss_iou": 0.95703125, + "loss_num": 0.07177734375, + "loss_xval": 2.265625, + "num_input_tokens_seen": 30408404, + "step": 459 + }, + { + "epoch": 0.04305705059203445, + "grad_norm": 31.746538162231445, + "learning_rate": 5e-05, + "loss": 2.1224, + "num_input_tokens_seen": 30474776, + "step": 460 + }, + { + "epoch": 0.04305705059203445, + "loss": 2.1912577152252197, + "loss_ce": 0.0027812570333480835, + "loss_iou": 0.9140625, + "loss_num": 0.07373046875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 30474776, + "step": 460 + }, + { + "epoch": 0.04315065287593017, + "grad_norm": 12.762518882751465, + "learning_rate": 5e-05, + "loss": 1.9152, + "num_input_tokens_seen": 30540476, + "step": 461 + }, + { + "epoch": 0.04315065287593017, + "loss": 1.9707589149475098, + "loss_ce": 0.0049386098980903625, + "loss_iou": 0.84375, + "loss_num": 0.055908203125, + "loss_xval": 1.96875, + "num_input_tokens_seen": 30540476, + "step": 461 + }, + { + "epoch": 0.0432442551598259, + "grad_norm": 15.633072853088379, + "learning_rate": 5e-05, + "loss": 2.0971, + "num_input_tokens_seen": 30607604, + "step": 462 + }, + { + "epoch": 0.0432442551598259, + "loss": 2.1641714572906494, + "loss_ce": 0.005968406796455383, + "loss_iou": 0.9375, + "loss_num": 0.057373046875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 30607604, + "step": 462 + }, + { + "epoch": 0.04333785744372163, + "grad_norm": 16.27423858642578, + "learning_rate": 5e-05, + "loss": 1.9722, + "num_input_tokens_seen": 30673660, + "step": 463 + }, + { + "epoch": 0.04333785744372163, + "loss": 1.743199110031128, + "loss_ce": 0.002476481255143881, + "loss_iou": 0.734375, + "loss_num": 0.053955078125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 30673660, + "step": 463 + }, + { + "epoch": 0.043431459727617354, + "grad_norm": 23.577835083007812, + "learning_rate": 5e-05, + "loss": 2.155, + "num_input_tokens_seen": 30741112, + "step": 464 + }, + { + "epoch": 0.043431459727617354, + "loss": 2.149505138397217, + "loss_ce": 0.0049738697707653046, + "loss_iou": 0.9140625, + "loss_num": 0.0634765625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 30741112, + "step": 464 + }, + { + "epoch": 0.04352506201151308, + "grad_norm": 9.723146438598633, + "learning_rate": 5e-05, + "loss": 1.7155, + "num_input_tokens_seen": 30806992, + "step": 465 + }, + { + "epoch": 0.04352506201151308, + "loss": 1.694063663482666, + "loss_ce": 0.004122295416891575, + "loss_iou": 0.703125, + "loss_num": 0.0556640625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 30806992, + "step": 465 + }, + { + "epoch": 0.04361866429540881, + "grad_norm": 12.820768356323242, + "learning_rate": 5e-05, + "loss": 2.1488, + "num_input_tokens_seen": 30872864, + "step": 466 + }, + { + "epoch": 0.04361866429540881, + "loss": 2.2483158111572266, + "loss_ce": 0.007104871328920126, + "loss_iou": 0.9609375, + "loss_num": 0.06396484375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 30872864, + "step": 466 + }, + { + "epoch": 0.043712266579304535, + "grad_norm": 23.971820831298828, + "learning_rate": 5e-05, + "loss": 2.2016, + "num_input_tokens_seen": 30938752, + "step": 467 + }, + { + "epoch": 0.043712266579304535, + "loss": 2.1699142456054688, + "loss_ce": 0.00585184246301651, + "loss_iou": 0.921875, + "loss_num": 0.06298828125, + "loss_xval": 2.15625, + "num_input_tokens_seen": 30938752, + "step": 467 + }, + { + "epoch": 0.04380586886320026, + "grad_norm": 7.219677448272705, + "learning_rate": 5e-05, + "loss": 2.2216, + "num_input_tokens_seen": 31004860, + "step": 468 + }, + { + "epoch": 0.04380586886320026, + "loss": 2.246121883392334, + "loss_ce": 0.007840558886528015, + "loss_iou": 0.9375, + "loss_num": 0.07177734375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 31004860, + "step": 468 + }, + { + "epoch": 0.04389947114709599, + "grad_norm": 9.824893951416016, + "learning_rate": 5e-05, + "loss": 2.1411, + "num_input_tokens_seen": 31070460, + "step": 469 + }, + { + "epoch": 0.04389947114709599, + "loss": 2.093827724456787, + "loss_ce": 0.003007214516401291, + "loss_iou": 0.875, + "loss_num": 0.06884765625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 31070460, + "step": 469 + }, + { + "epoch": 0.043993073430991715, + "grad_norm": 13.35690689086914, + "learning_rate": 5e-05, + "loss": 2.1013, + "num_input_tokens_seen": 31136604, + "step": 470 + }, + { + "epoch": 0.043993073430991715, + "loss": 2.2541399002075195, + "loss_ce": 0.009999187663197517, + "loss_iou": 0.9453125, + "loss_num": 0.07177734375, + "loss_xval": 2.25, + "num_input_tokens_seen": 31136604, + "step": 470 + }, + { + "epoch": 0.04408667571488745, + "grad_norm": 10.634860038757324, + "learning_rate": 5e-05, + "loss": 1.8709, + "num_input_tokens_seen": 31202396, + "step": 471 + }, + { + "epoch": 0.04408667571488745, + "loss": 1.7519464492797852, + "loss_ce": 0.0038995440118014812, + "loss_iou": 0.7578125, + "loss_num": 0.046630859375, + "loss_xval": 1.75, + "num_input_tokens_seen": 31202396, + "step": 471 + }, + { + "epoch": 0.04418027799878317, + "grad_norm": 24.335004806518555, + "learning_rate": 5e-05, + "loss": 2.0886, + "num_input_tokens_seen": 31269104, + "step": 472 + }, + { + "epoch": 0.04418027799878317, + "loss": 2.0880887508392334, + "loss_ce": 0.006057538092136383, + "loss_iou": 0.890625, + "loss_num": 0.059814453125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 31269104, + "step": 472 + }, + { + "epoch": 0.044273880282678896, + "grad_norm": 25.776592254638672, + "learning_rate": 5e-05, + "loss": 2.0023, + "num_input_tokens_seen": 31335244, + "step": 473 + }, + { + "epoch": 0.044273880282678896, + "loss": 1.954072117805481, + "loss_ce": 0.0029002828523516655, + "loss_iou": 0.86328125, + "loss_num": 0.044677734375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 31335244, + "step": 473 + }, + { + "epoch": 0.04436748256657463, + "grad_norm": 9.409109115600586, + "learning_rate": 5e-05, + "loss": 2.4935, + "num_input_tokens_seen": 31401428, + "step": 474 + }, + { + "epoch": 0.04436748256657463, + "loss": 2.5019659996032715, + "loss_ce": 0.0019660217221826315, + "loss_iou": 1.0625, + "loss_num": 0.0771484375, + "loss_xval": 2.5, + "num_input_tokens_seen": 31401428, + "step": 474 + }, + { + "epoch": 0.04446108485047035, + "grad_norm": 16.741493225097656, + "learning_rate": 5e-05, + "loss": 2.2775, + "num_input_tokens_seen": 31468948, + "step": 475 + }, + { + "epoch": 0.04446108485047035, + "loss": 2.27400541305542, + "loss_ce": 0.005450926721096039, + "loss_iou": 0.94921875, + "loss_num": 0.07421875, + "loss_xval": 2.265625, + "num_input_tokens_seen": 31468948, + "step": 475 + }, + { + "epoch": 0.044554687134366076, + "grad_norm": 8.939950942993164, + "learning_rate": 5e-05, + "loss": 2.0622, + "num_input_tokens_seen": 31536260, + "step": 476 + }, + { + "epoch": 0.044554687134366076, + "loss": 2.106278419494629, + "loss_ce": 0.006669181399047375, + "loss_iou": 0.87109375, + "loss_num": 0.07080078125, + "loss_xval": 2.09375, + "num_input_tokens_seen": 31536260, + "step": 476 + }, + { + "epoch": 0.04464828941826181, + "grad_norm": 20.194087982177734, + "learning_rate": 5e-05, + "loss": 1.9375, + "num_input_tokens_seen": 31602536, + "step": 477 + }, + { + "epoch": 0.04464828941826181, + "loss": 1.848040223121643, + "loss_ce": 0.007219946011900902, + "loss_iou": 0.7734375, + "loss_num": 0.05810546875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 31602536, + "step": 477 + }, + { + "epoch": 0.04474189170215753, + "grad_norm": 12.22110652923584, + "learning_rate": 5e-05, + "loss": 2.287, + "num_input_tokens_seen": 31668696, + "step": 478 + }, + { + "epoch": 0.04474189170215753, + "loss": 2.145697593688965, + "loss_ce": 0.004096093587577343, + "loss_iou": 0.91796875, + "loss_num": 0.061279296875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 31668696, + "step": 478 + }, + { + "epoch": 0.04483549398605326, + "grad_norm": 12.94542407989502, + "learning_rate": 5e-05, + "loss": 2.0049, + "num_input_tokens_seen": 31734556, + "step": 479 + }, + { + "epoch": 0.04483549398605326, + "loss": 1.8130786418914795, + "loss_ce": 0.007536758668720722, + "loss_iou": 0.765625, + "loss_num": 0.055908203125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 31734556, + "step": 479 + }, + { + "epoch": 0.04492909626994899, + "grad_norm": 20.849462509155273, + "learning_rate": 5e-05, + "loss": 2.0953, + "num_input_tokens_seen": 31801108, + "step": 480 + }, + { + "epoch": 0.04492909626994899, + "loss": 2.0138297080993652, + "loss_ce": 0.004063955508172512, + "loss_iou": 0.83984375, + "loss_num": 0.06640625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 31801108, + "step": 480 + }, + { + "epoch": 0.04502269855384471, + "grad_norm": 21.251911163330078, + "learning_rate": 5e-05, + "loss": 2.0417, + "num_input_tokens_seen": 31868384, + "step": 481 + }, + { + "epoch": 0.04502269855384471, + "loss": 2.1381373405456543, + "loss_ce": 0.0014183730818331242, + "loss_iou": 0.875, + "loss_num": 0.0771484375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 31868384, + "step": 481 + }, + { + "epoch": 0.04511630083774044, + "grad_norm": 23.895750045776367, + "learning_rate": 5e-05, + "loss": 2.0814, + "num_input_tokens_seen": 31934348, + "step": 482 + }, + { + "epoch": 0.04511630083774044, + "loss": 2.180201530456543, + "loss_ce": 0.008936937898397446, + "loss_iou": 0.8984375, + "loss_num": 0.07421875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 31934348, + "step": 482 + }, + { + "epoch": 0.04520990312163617, + "grad_norm": 13.015569686889648, + "learning_rate": 5e-05, + "loss": 2.3738, + "num_input_tokens_seen": 31999700, + "step": 483 + }, + { + "epoch": 0.04520990312163617, + "loss": 2.3209519386291504, + "loss_ce": 0.007475219201296568, + "loss_iou": 0.91796875, + "loss_num": 0.09521484375, + "loss_xval": 2.3125, + "num_input_tokens_seen": 31999700, + "step": 483 + }, + { + "epoch": 0.045303505405531894, + "grad_norm": 35.630584716796875, + "learning_rate": 5e-05, + "loss": 2.3771, + "num_input_tokens_seen": 32065160, + "step": 484 + }, + { + "epoch": 0.045303505405531894, + "loss": 2.546733856201172, + "loss_ce": 0.005718107335269451, + "loss_iou": 1.0078125, + "loss_num": 0.10595703125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 32065160, + "step": 484 + }, + { + "epoch": 0.045397107689427625, + "grad_norm": 212.5460205078125, + "learning_rate": 5e-05, + "loss": 2.0635, + "num_input_tokens_seen": 32131324, + "step": 485 + }, + { + "epoch": 0.045397107689427625, + "loss": 1.9449450969696045, + "loss_ce": 0.00939822755753994, + "loss_iou": 0.75390625, + "loss_num": 0.0859375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 32131324, + "step": 485 + }, + { + "epoch": 0.04549070997332335, + "grad_norm": 14.1449556350708, + "learning_rate": 5e-05, + "loss": 1.8435, + "num_input_tokens_seen": 32196900, + "step": 486 + }, + { + "epoch": 0.04549070997332335, + "loss": 1.8770546913146973, + "loss_ce": 0.005228529218584299, + "loss_iou": 0.77734375, + "loss_num": 0.0625, + "loss_xval": 1.875, + "num_input_tokens_seen": 32196900, + "step": 486 + }, + { + "epoch": 0.045584312257219074, + "grad_norm": 12.691750526428223, + "learning_rate": 5e-05, + "loss": 2.0293, + "num_input_tokens_seen": 32262384, + "step": 487 + }, + { + "epoch": 0.045584312257219074, + "loss": 2.024717330932617, + "loss_ce": 0.006162787787616253, + "loss_iou": 0.8203125, + "loss_num": 0.0751953125, + "loss_xval": 2.015625, + "num_input_tokens_seen": 32262384, + "step": 487 + }, + { + "epoch": 0.045677914541114806, + "grad_norm": 15.173015594482422, + "learning_rate": 5e-05, + "loss": 1.8394, + "num_input_tokens_seen": 32328628, + "step": 488 + }, + { + "epoch": 0.045677914541114806, + "loss": 1.748635172843933, + "loss_ce": 0.01023173052817583, + "loss_iou": 0.734375, + "loss_num": 0.0537109375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 32328628, + "step": 488 + }, + { + "epoch": 0.04577151682501053, + "grad_norm": 19.01584243774414, + "learning_rate": 5e-05, + "loss": 2.1774, + "num_input_tokens_seen": 32395556, + "step": 489 + }, + { + "epoch": 0.04577151682501053, + "loss": 2.153399705886841, + "loss_ce": 0.006915212143212557, + "loss_iou": 0.89453125, + "loss_num": 0.0712890625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 32395556, + "step": 489 + }, + { + "epoch": 0.045865119108906255, + "grad_norm": 11.831512451171875, + "learning_rate": 5e-05, + "loss": 2.1523, + "num_input_tokens_seen": 32461724, + "step": 490 + }, + { + "epoch": 0.045865119108906255, + "loss": 2.3882484436035156, + "loss_ce": 0.0025062626227736473, + "loss_iou": 0.98828125, + "loss_num": 0.0810546875, + "loss_xval": 2.390625, + "num_input_tokens_seen": 32461724, + "step": 490 + }, + { + "epoch": 0.045958721392801986, + "grad_norm": 15.672270774841309, + "learning_rate": 5e-05, + "loss": 1.829, + "num_input_tokens_seen": 32527456, + "step": 491 + }, + { + "epoch": 0.045958721392801986, + "loss": 1.8705118894577026, + "loss_ce": 0.005277492105960846, + "loss_iou": 0.78125, + "loss_num": 0.060791015625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 32527456, + "step": 491 + }, + { + "epoch": 0.04605232367669771, + "grad_norm": 21.85772705078125, + "learning_rate": 5e-05, + "loss": 2.075, + "num_input_tokens_seen": 32595248, + "step": 492 + }, + { + "epoch": 0.04605232367669771, + "loss": 1.899980068206787, + "loss_ce": 0.006425441242754459, + "loss_iou": 0.828125, + "loss_num": 0.047119140625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 32595248, + "step": 492 + }, + { + "epoch": 0.046145925960593435, + "grad_norm": 21.907089233398438, + "learning_rate": 5e-05, + "loss": 2.0562, + "num_input_tokens_seen": 32661588, + "step": 493 + }, + { + "epoch": 0.046145925960593435, + "loss": 2.0572381019592285, + "loss_ce": 0.007433369755744934, + "loss_iou": 0.84765625, + "loss_num": 0.07080078125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 32661588, + "step": 493 + }, + { + "epoch": 0.04623952824448917, + "grad_norm": 13.75629997253418, + "learning_rate": 5e-05, + "loss": 2.0423, + "num_input_tokens_seen": 32727640, + "step": 494 + }, + { + "epoch": 0.04623952824448917, + "loss": 2.1492695808410645, + "loss_ce": 0.0027852682396769524, + "loss_iou": 0.9140625, + "loss_num": 0.0625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 32727640, + "step": 494 + }, + { + "epoch": 0.04633313052838489, + "grad_norm": 29.714677810668945, + "learning_rate": 5e-05, + "loss": 2.2401, + "num_input_tokens_seen": 32792568, + "step": 495 + }, + { + "epoch": 0.04633313052838489, + "loss": 2.181577444076538, + "loss_ce": 0.004819741006940603, + "loss_iou": 0.921875, + "loss_num": 0.06689453125, + "loss_xval": 2.171875, + "num_input_tokens_seen": 32792568, + "step": 495 + }, + { + "epoch": 0.04642673281228062, + "grad_norm": 19.96394920349121, + "learning_rate": 5e-05, + "loss": 2.0922, + "num_input_tokens_seen": 32859600, + "step": 496 + }, + { + "epoch": 0.04642673281228062, + "loss": 2.0059657096862793, + "loss_ce": 0.003035774687305093, + "loss_iou": 0.828125, + "loss_num": 0.068359375, + "loss_xval": 2.0, + "num_input_tokens_seen": 32859600, + "step": 496 + }, + { + "epoch": 0.04652033509617635, + "grad_norm": 21.029605865478516, + "learning_rate": 5e-05, + "loss": 2.185, + "num_input_tokens_seen": 32924776, + "step": 497 + }, + { + "epoch": 0.04652033509617635, + "loss": 2.2143352031707764, + "loss_ce": 0.006327410228550434, + "loss_iou": 0.95703125, + "loss_num": 0.0595703125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 32924776, + "step": 497 + }, + { + "epoch": 0.04661393738007207, + "grad_norm": 14.5270414352417, + "learning_rate": 5e-05, + "loss": 1.9225, + "num_input_tokens_seen": 32991020, + "step": 498 + }, + { + "epoch": 0.04661393738007207, + "loss": 2.088118076324463, + "loss_ce": 0.00901655200868845, + "loss_iou": 0.88671875, + "loss_num": 0.06103515625, + "loss_xval": 2.078125, + "num_input_tokens_seen": 32991020, + "step": 498 + }, + { + "epoch": 0.046707539663967804, + "grad_norm": 13.652289390563965, + "learning_rate": 5e-05, + "loss": 1.8995, + "num_input_tokens_seen": 33056636, + "step": 499 + }, + { + "epoch": 0.046707539663967804, + "loss": 1.8197847604751587, + "loss_ce": 0.009237861260771751, + "loss_iou": 0.7578125, + "loss_num": 0.05908203125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 33056636, + "step": 499 + }, + { + "epoch": 0.04680114194786353, + "grad_norm": 14.034210205078125, + "learning_rate": 5e-05, + "loss": 1.7604, + "num_input_tokens_seen": 33122848, + "step": 500 + }, + { + "epoch": 0.04680114194786353, + "eval_seeclick_CIoU": 0.010791782289743423, + "eval_seeclick_GIoU": -0.034255435690283775, + "eval_seeclick_IoU": 0.1710895374417305, + "eval_seeclick_MAE_all": 0.15610893070697784, + "eval_seeclick_MAE_h": 0.14781776815652847, + "eval_seeclick_MAE_w": 0.14510643482208252, + "eval_seeclick_MAE_x_boxes": 0.22438225895166397, + "eval_seeclick_MAE_y_boxes": 0.14794477075338364, + "eval_seeclick_NUM_probability": 0.9976383745670319, + "eval_seeclick_inside_bbox": 0.20937500149011612, + "eval_seeclick_loss": 2.8477022647857666, + "eval_seeclick_loss_ce": 0.014594110660254955, + "eval_seeclick_loss_iou": 1.03955078125, + "eval_seeclick_loss_num": 0.1512451171875, + "eval_seeclick_loss_xval": 2.8349609375, + "eval_seeclick_runtime": 61.9813, + "eval_seeclick_samples_per_second": 0.758, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 33122848, + "step": 500 + }, + { + "epoch": 0.04680114194786353, + "eval_icons_CIoU": -0.18846502900123596, + "eval_icons_GIoU": -0.17556016892194748, + "eval_icons_IoU": 0.010889915749430656, + "eval_icons_MAE_all": 0.21332991123199463, + "eval_icons_MAE_h": 0.21519098430871964, + "eval_icons_MAE_w": 0.25561511516571045, + "eval_icons_MAE_x_boxes": 0.1543479636311531, + "eval_icons_MAE_y_boxes": 0.12868855893611908, + "eval_icons_NUM_probability": 0.9995498061180115, + "eval_icons_inside_bbox": 0.05902777798473835, + "eval_icons_loss": 3.4127511978149414, + "eval_icons_loss_ce": 0.0002681785117601976, + "eval_icons_loss_iou": 1.17431640625, + "eval_icons_loss_num": 0.21893310546875, + "eval_icons_loss_xval": 3.4443359375, + "eval_icons_runtime": 64.3223, + "eval_icons_samples_per_second": 0.777, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 33122848, + "step": 500 + }, + { + "epoch": 0.04680114194786353, + "eval_screenspot_CIoU": -0.0015938772509495418, + "eval_screenspot_GIoU": -0.02167066124578317, + "eval_screenspot_IoU": 0.16240592549244562, + "eval_screenspot_MAE_all": 0.1384421413143476, + "eval_screenspot_MAE_h": 0.11245785405238469, + "eval_screenspot_MAE_w": 0.16129297018051147, + "eval_screenspot_MAE_x_boxes": 0.1720647563536962, + "eval_screenspot_MAE_y_boxes": 0.09524397552013397, + "eval_screenspot_NUM_probability": 0.9993804494539896, + "eval_screenspot_inside_bbox": 0.420416663090388, + "eval_screenspot_loss": 2.786745309829712, + "eval_screenspot_loss_ce": 0.011604713276028633, + "eval_screenspot_loss_iou": 1.0431315104166667, + "eval_screenspot_loss_num": 0.14615885416666666, + "eval_screenspot_loss_xval": 2.8170572916666665, + "eval_screenspot_runtime": 128.2233, + "eval_screenspot_samples_per_second": 0.694, + "eval_screenspot_steps_per_second": 0.023, + "num_input_tokens_seen": 33122848, + "step": 500 + }, + { + "epoch": 0.04680114194786353, + "eval_compot_CIoU": -0.07110186293721199, + "eval_compot_GIoU": -0.06726318597793579, + "eval_compot_IoU": 0.08910071477293968, + "eval_compot_MAE_all": 0.10658146440982819, + "eval_compot_MAE_h": 0.08906470611691475, + "eval_compot_MAE_w": 0.11970876902341843, + "eval_compot_MAE_x_boxes": 0.11287350952625275, + "eval_compot_MAE_y_boxes": 0.09081846103072166, + "eval_compot_NUM_probability": 0.9994225800037384, + "eval_compot_inside_bbox": 0.2326388955116272, + "eval_compot_loss": 2.6506786346435547, + "eval_compot_loss_ce": 0.004752482753247023, + "eval_compot_loss_iou": 1.05419921875, + "eval_compot_loss_num": 0.1040191650390625, + "eval_compot_loss_xval": 2.62841796875, + "eval_compot_runtime": 74.8159, + "eval_compot_samples_per_second": 0.668, + "eval_compot_steps_per_second": 0.027, + "num_input_tokens_seen": 33122848, + "step": 500 + }, + { + "epoch": 0.04680114194786353, + "eval_custom_ui_MAE_all": 0.16714157909154892, + "eval_custom_ui_MAE_x": 0.12132188677787781, + "eval_custom_ui_MAE_y": 0.21296124905347824, + "eval_custom_ui_NUM_probability": 0.996900349855423, + "eval_custom_ui_loss": 0.8348149061203003, + "eval_custom_ui_loss_ce": 0.017060188576579094, + "eval_custom_ui_loss_num": 0.163848876953125, + "eval_custom_ui_loss_xval": 0.819091796875, + "eval_custom_ui_runtime": 50.998, + "eval_custom_ui_samples_per_second": 0.98, + "eval_custom_ui_steps_per_second": 0.039, + "num_input_tokens_seen": 33122848, + "step": 500 + }, + { + "epoch": 0.04680114194786353, + "loss": 0.8233832120895386, + "loss_ce": 0.018695715814828873, + "loss_iou": 0.0, + "loss_num": 0.1611328125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 33122848, + "step": 500 + }, + { + "epoch": 0.04689474423175925, + "grad_norm": 13.490910530090332, + "learning_rate": 5e-05, + "loss": 2.1551, + "num_input_tokens_seen": 33189200, + "step": 501 + }, + { + "epoch": 0.04689474423175925, + "loss": 2.0641427040100098, + "loss_ce": 0.008478588424623013, + "loss_iou": 0.8828125, + "loss_num": 0.0576171875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 33189200, + "step": 501 + }, + { + "epoch": 0.046988346515654984, + "grad_norm": 15.705336570739746, + "learning_rate": 5e-05, + "loss": 1.9723, + "num_input_tokens_seen": 33255100, + "step": 502 + }, + { + "epoch": 0.046988346515654984, + "loss": 1.782179832458496, + "loss_ce": 0.007155276834964752, + "loss_iou": 0.7421875, + "loss_num": 0.058349609375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 33255100, + "step": 502 + }, + { + "epoch": 0.04708194879955071, + "grad_norm": 60.570655822753906, + "learning_rate": 5e-05, + "loss": 2.112, + "num_input_tokens_seen": 33321280, + "step": 503 + }, + { + "epoch": 0.04708194879955071, + "loss": 2.0688400268554688, + "loss_ce": 0.008293201215565205, + "loss_iou": 0.890625, + "loss_num": 0.054931640625, + "loss_xval": 2.0625, + "num_input_tokens_seen": 33321280, + "step": 503 + }, + { + "epoch": 0.04717555108344643, + "grad_norm": 16.53862762451172, + "learning_rate": 5e-05, + "loss": 2.4633, + "num_input_tokens_seen": 33386876, + "step": 504 + }, + { + "epoch": 0.04717555108344643, + "loss": 2.282855987548828, + "loss_ce": 0.008686106652021408, + "loss_iou": 0.8828125, + "loss_num": 0.10205078125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 33386876, + "step": 504 + }, + { + "epoch": 0.047269153367342165, + "grad_norm": 20.333419799804688, + "learning_rate": 5e-05, + "loss": 2.1628, + "num_input_tokens_seen": 33454072, + "step": 505 + }, + { + "epoch": 0.047269153367342165, + "loss": 2.0465087890625, + "loss_ce": 0.003540055826306343, + "loss_iou": 0.88671875, + "loss_num": 0.05322265625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 33454072, + "step": 505 + }, + { + "epoch": 0.04736275565123789, + "grad_norm": 30.461502075195312, + "learning_rate": 5e-05, + "loss": 1.7795, + "num_input_tokens_seen": 33520064, + "step": 506 + }, + { + "epoch": 0.04736275565123789, + "loss": 2.113797664642334, + "loss_ce": 0.005399085581302643, + "loss_iou": 0.8984375, + "loss_num": 0.062255859375, + "loss_xval": 2.109375, + "num_input_tokens_seen": 33520064, + "step": 506 + }, + { + "epoch": 0.047456357935133614, + "grad_norm": 19.000221252441406, + "learning_rate": 5e-05, + "loss": 2.1664, + "num_input_tokens_seen": 33586816, + "step": 507 + }, + { + "epoch": 0.047456357935133614, + "loss": 2.2077221870422363, + "loss_ce": 0.008503537625074387, + "loss_iou": 0.91796875, + "loss_num": 0.07373046875, + "loss_xval": 2.203125, + "num_input_tokens_seen": 33586816, + "step": 507 + }, + { + "epoch": 0.047549960219029345, + "grad_norm": 21.011343002319336, + "learning_rate": 5e-05, + "loss": 2.1269, + "num_input_tokens_seen": 33652068, + "step": 508 + }, + { + "epoch": 0.047549960219029345, + "loss": 2.2789878845214844, + "loss_ce": 0.007503424771130085, + "loss_iou": 0.92578125, + "loss_num": 0.08447265625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 33652068, + "step": 508 + }, + { + "epoch": 0.04764356250292507, + "grad_norm": 47.962913513183594, + "learning_rate": 5e-05, + "loss": 2.1327, + "num_input_tokens_seen": 33717020, + "step": 509 + }, + { + "epoch": 0.04764356250292507, + "loss": 2.3316895961761475, + "loss_ce": 0.007470785174518824, + "loss_iou": 0.96875, + "loss_num": 0.07666015625, + "loss_xval": 2.328125, + "num_input_tokens_seen": 33717020, + "step": 509 + }, + { + "epoch": 0.0477371647868208, + "grad_norm": 11.047908782958984, + "learning_rate": 5e-05, + "loss": 2.0486, + "num_input_tokens_seen": 33783980, + "step": 510 + }, + { + "epoch": 0.0477371647868208, + "loss": 2.0446388721466064, + "loss_ce": 0.00362326018512249, + "loss_iou": 0.8515625, + "loss_num": 0.06884765625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 33783980, + "step": 510 + }, + { + "epoch": 0.047830767070716526, + "grad_norm": 29.023284912109375, + "learning_rate": 5e-05, + "loss": 1.9636, + "num_input_tokens_seen": 33850420, + "step": 511 + }, + { + "epoch": 0.047830767070716526, + "loss": 1.8442463874816895, + "loss_ce": 0.010872384533286095, + "loss_iou": 0.76953125, + "loss_num": 0.057861328125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 33850420, + "step": 511 + }, + { + "epoch": 0.04792436935461225, + "grad_norm": 11.395902633666992, + "learning_rate": 5e-05, + "loss": 2.2705, + "num_input_tokens_seen": 33916568, + "step": 512 + }, + { + "epoch": 0.04792436935461225, + "loss": 2.2476541996002197, + "loss_ce": 0.007419949397444725, + "loss_iou": 0.9296875, + "loss_num": 0.076171875, + "loss_xval": 2.234375, + "num_input_tokens_seen": 33916568, + "step": 512 + }, + { + "epoch": 0.04801797163850798, + "grad_norm": 14.564925193786621, + "learning_rate": 5e-05, + "loss": 2.3737, + "num_input_tokens_seen": 33981932, + "step": 513 + }, + { + "epoch": 0.04801797163850798, + "loss": 2.4528675079345703, + "loss_ce": 0.011461199261248112, + "loss_iou": 0.99609375, + "loss_num": 0.08935546875, + "loss_xval": 2.4375, + "num_input_tokens_seen": 33981932, + "step": 513 + }, + { + "epoch": 0.04811157392240371, + "grad_norm": 26.9398193359375, + "learning_rate": 5e-05, + "loss": 1.8815, + "num_input_tokens_seen": 34048428, + "step": 514 + }, + { + "epoch": 0.04811157392240371, + "loss": 1.838984489440918, + "loss_ce": 0.006953159347176552, + "loss_iou": 0.77734375, + "loss_num": 0.05517578125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 34048428, + "step": 514 + }, + { + "epoch": 0.04820517620629943, + "grad_norm": 50.43394088745117, + "learning_rate": 5e-05, + "loss": 2.2906, + "num_input_tokens_seen": 34114968, + "step": 515 + }, + { + "epoch": 0.04820517620629943, + "loss": 2.0050594806671143, + "loss_ce": 0.006036060396581888, + "loss_iou": 0.86328125, + "loss_num": 0.05419921875, + "loss_xval": 2.0, + "num_input_tokens_seen": 34114968, + "step": 515 + }, + { + "epoch": 0.04829877849019516, + "grad_norm": 11.556628227233887, + "learning_rate": 5e-05, + "loss": 1.8818, + "num_input_tokens_seen": 34181736, + "step": 516 + }, + { + "epoch": 0.04829877849019516, + "loss": 1.8759956359863281, + "loss_ce": 0.00587841309607029, + "loss_iou": 0.796875, + "loss_num": 0.055419921875, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 34181736, + "step": 516 + }, + { + "epoch": 0.04839238077409089, + "grad_norm": 18.406572341918945, + "learning_rate": 5e-05, + "loss": 1.9696, + "num_input_tokens_seen": 34247268, + "step": 517 + }, + { + "epoch": 0.04839238077409089, + "loss": 2.1460466384887695, + "loss_ce": 0.0112810879945755, + "loss_iou": 0.828125, + "loss_num": 0.09619140625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 34247268, + "step": 517 + }, + { + "epoch": 0.04848598305798661, + "grad_norm": 13.858189582824707, + "learning_rate": 5e-05, + "loss": 2.0945, + "num_input_tokens_seen": 34313260, + "step": 518 + }, + { + "epoch": 0.04848598305798661, + "loss": 2.029033899307251, + "loss_ce": 0.0016901454655453563, + "loss_iou": 0.8359375, + "loss_num": 0.0712890625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 34313260, + "step": 518 + }, + { + "epoch": 0.04857958534188234, + "grad_norm": 24.578332901000977, + "learning_rate": 5e-05, + "loss": 2.0907, + "num_input_tokens_seen": 34379356, + "step": 519 + }, + { + "epoch": 0.04857958534188234, + "loss": 2.06921124458313, + "loss_ce": 0.010617459192872047, + "loss_iou": 0.859375, + "loss_num": 0.0673828125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 34379356, + "step": 519 + }, + { + "epoch": 0.04867318762577807, + "grad_norm": 15.262187004089355, + "learning_rate": 5e-05, + "loss": 2.2961, + "num_input_tokens_seen": 34445688, + "step": 520 + }, + { + "epoch": 0.04867318762577807, + "loss": 2.418579578399658, + "loss_ce": 0.004517007619142532, + "loss_iou": 0.97265625, + "loss_num": 0.09326171875, + "loss_xval": 2.40625, + "num_input_tokens_seen": 34445688, + "step": 520 + }, + { + "epoch": 0.0487667899096738, + "grad_norm": 50.165802001953125, + "learning_rate": 5e-05, + "loss": 1.9634, + "num_input_tokens_seen": 34512316, + "step": 521 + }, + { + "epoch": 0.0487667899096738, + "loss": 2.0381336212158203, + "loss_ce": 0.005907262675464153, + "loss_iou": 0.84765625, + "loss_num": 0.068359375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 34512316, + "step": 521 + }, + { + "epoch": 0.048860392193569524, + "grad_norm": 35.08852005004883, + "learning_rate": 5e-05, + "loss": 2.096, + "num_input_tokens_seen": 34579448, + "step": 522 + }, + { + "epoch": 0.048860392193569524, + "loss": 2.0467257499694824, + "loss_ce": 0.005710337311029434, + "loss_iou": 0.84765625, + "loss_num": 0.06884765625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 34579448, + "step": 522 + }, + { + "epoch": 0.04895399447746525, + "grad_norm": 11.056382179260254, + "learning_rate": 5e-05, + "loss": 2.4524, + "num_input_tokens_seen": 34645752, + "step": 523 + }, + { + "epoch": 0.04895399447746525, + "loss": 2.5007433891296387, + "loss_ce": 0.006602696608752012, + "loss_iou": 1.0390625, + "loss_num": 0.08251953125, + "loss_xval": 2.5, + "num_input_tokens_seen": 34645752, + "step": 523 + }, + { + "epoch": 0.04904759676136098, + "grad_norm": 8.489731788635254, + "learning_rate": 5e-05, + "loss": 2.5202, + "num_input_tokens_seen": 34711724, + "step": 524 + }, + { + "epoch": 0.04904759676136098, + "loss": 2.4595420360565186, + "loss_ce": 0.008370120078325272, + "loss_iou": 1.0390625, + "loss_num": 0.07373046875, + "loss_xval": 2.453125, + "num_input_tokens_seen": 34711724, + "step": 524 + }, + { + "epoch": 0.049141199045256705, + "grad_norm": 15.339666366577148, + "learning_rate": 5e-05, + "loss": 2.3298, + "num_input_tokens_seen": 34778112, + "step": 525 + }, + { + "epoch": 0.049141199045256705, + "loss": 2.4460458755493164, + "loss_ce": 0.0065928734838962555, + "loss_iou": 1.0078125, + "loss_num": 0.08447265625, + "loss_xval": 2.4375, + "num_input_tokens_seen": 34778112, + "step": 525 + }, + { + "epoch": 0.04923480132915243, + "grad_norm": 13.85021686553955, + "learning_rate": 5e-05, + "loss": 2.0732, + "num_input_tokens_seen": 34845392, + "step": 526 + }, + { + "epoch": 0.04923480132915243, + "loss": 2.011549234390259, + "loss_ce": 0.006666369736194611, + "loss_iou": 0.890625, + "loss_num": 0.044189453125, + "loss_xval": 2.0, + "num_input_tokens_seen": 34845392, + "step": 526 + }, + { + "epoch": 0.04932840361304816, + "grad_norm": 19.560449600219727, + "learning_rate": 5e-05, + "loss": 1.7883, + "num_input_tokens_seen": 34911200, + "step": 527 + }, + { + "epoch": 0.04932840361304816, + "loss": 1.6927893161773682, + "loss_ce": 0.004434742033481598, + "loss_iou": 0.68359375, + "loss_num": 0.064453125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 34911200, + "step": 527 + }, + { + "epoch": 0.049422005896943885, + "grad_norm": 32.109779357910156, + "learning_rate": 5e-05, + "loss": 1.9413, + "num_input_tokens_seen": 34977464, + "step": 528 + }, + { + "epoch": 0.049422005896943885, + "loss": 1.9414345026016235, + "loss_ce": 0.0019814781844615936, + "loss_iou": 0.81640625, + "loss_num": 0.062255859375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 34977464, + "step": 528 + }, + { + "epoch": 0.04951560818083961, + "grad_norm": 12.003063201904297, + "learning_rate": 5e-05, + "loss": 2.1411, + "num_input_tokens_seen": 35043968, + "step": 529 + }, + { + "epoch": 0.04951560818083961, + "loss": 2.186443328857422, + "loss_ce": 0.004802762530744076, + "loss_iou": 0.9375, + "loss_num": 0.061767578125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 35043968, + "step": 529 + }, + { + "epoch": 0.04960921046473534, + "grad_norm": 13.4691743850708, + "learning_rate": 5e-05, + "loss": 2.0753, + "num_input_tokens_seen": 35110340, + "step": 530 + }, + { + "epoch": 0.04960921046473534, + "loss": 2.179971218109131, + "loss_ce": 0.008096389472484589, + "loss_iou": 0.890625, + "loss_num": 0.07861328125, + "loss_xval": 2.171875, + "num_input_tokens_seen": 35110340, + "step": 530 + }, + { + "epoch": 0.049702812748631066, + "grad_norm": 23.27361488342285, + "learning_rate": 5e-05, + "loss": 1.9764, + "num_input_tokens_seen": 35175964, + "step": 531 + }, + { + "epoch": 0.049702812748631066, + "loss": 2.1452090740203857, + "loss_ce": 0.006537298671901226, + "loss_iou": 0.93359375, + "loss_num": 0.0537109375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 35175964, + "step": 531 + }, + { + "epoch": 0.04979641503252679, + "grad_norm": 12.105748176574707, + "learning_rate": 5e-05, + "loss": 1.9665, + "num_input_tokens_seen": 35241672, + "step": 532 + }, + { + "epoch": 0.04979641503252679, + "loss": 1.637925386428833, + "loss_ce": 0.006455630529671907, + "loss_iou": 0.67578125, + "loss_num": 0.05615234375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 35241672, + "step": 532 + }, + { + "epoch": 0.04989001731642252, + "grad_norm": 15.676896095275879, + "learning_rate": 5e-05, + "loss": 2.0951, + "num_input_tokens_seen": 35307804, + "step": 533 + }, + { + "epoch": 0.04989001731642252, + "loss": 1.8774535655975342, + "loss_ce": 0.008312873542308807, + "loss_iou": 0.7578125, + "loss_num": 0.0712890625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 35307804, + "step": 533 + }, + { + "epoch": 0.049983619600318246, + "grad_norm": 20.40330696105957, + "learning_rate": 5e-05, + "loss": 1.9254, + "num_input_tokens_seen": 35373448, + "step": 534 + }, + { + "epoch": 0.049983619600318246, + "loss": 2.0857203006744385, + "loss_ce": 0.0075952280312776566, + "loss_iou": 0.84765625, + "loss_num": 0.0771484375, + "loss_xval": 2.078125, + "num_input_tokens_seen": 35373448, + "step": 534 + }, + { + "epoch": 0.05007722188421398, + "grad_norm": 10.411561965942383, + "learning_rate": 5e-05, + "loss": 2.1476, + "num_input_tokens_seen": 35439568, + "step": 535 + }, + { + "epoch": 0.05007722188421398, + "loss": 2.1036837100982666, + "loss_ce": 0.008102689869701862, + "loss_iou": 0.86328125, + "loss_num": 0.07373046875, + "loss_xval": 2.09375, + "num_input_tokens_seen": 35439568, + "step": 535 + }, + { + "epoch": 0.0501708241681097, + "grad_norm": 10.970884323120117, + "learning_rate": 5e-05, + "loss": 1.7214, + "num_input_tokens_seen": 35504736, + "step": 536 + }, + { + "epoch": 0.0501708241681097, + "loss": 1.7694377899169922, + "loss_ce": 0.007230841089040041, + "loss_iou": 0.6953125, + "loss_num": 0.07470703125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 35504736, + "step": 536 + }, + { + "epoch": 0.05026442645200543, + "grad_norm": 18.94941520690918, + "learning_rate": 5e-05, + "loss": 1.9199, + "num_input_tokens_seen": 35571456, + "step": 537 + }, + { + "epoch": 0.05026442645200543, + "loss": 1.9069634675979614, + "loss_ce": 0.00461967196315527, + "loss_iou": 0.8125, + "loss_num": 0.055419921875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 35571456, + "step": 537 + }, + { + "epoch": 0.05035802873590116, + "grad_norm": 19.750707626342773, + "learning_rate": 5e-05, + "loss": 2.3119, + "num_input_tokens_seen": 35638668, + "step": 538 + }, + { + "epoch": 0.05035802873590116, + "loss": 2.2893307209014893, + "loss_ce": 0.004174430388957262, + "loss_iou": 1.0, + "loss_num": 0.058349609375, + "loss_xval": 2.28125, + "num_input_tokens_seen": 35638668, + "step": 538 + }, + { + "epoch": 0.05045163101979688, + "grad_norm": 49.66666793823242, + "learning_rate": 5e-05, + "loss": 2.0795, + "num_input_tokens_seen": 35705296, + "step": 539 + }, + { + "epoch": 0.05045163101979688, + "loss": 2.171050548553467, + "loss_ce": 0.009917769581079483, + "loss_iou": 0.90625, + "loss_num": 0.0693359375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 35705296, + "step": 539 + }, + { + "epoch": 0.05054523330369261, + "grad_norm": 18.106250762939453, + "learning_rate": 5e-05, + "loss": 2.0437, + "num_input_tokens_seen": 35771804, + "step": 540 + }, + { + "epoch": 0.05054523330369261, + "loss": 2.156437397003174, + "loss_ce": 0.006046803202480078, + "loss_iou": 0.84765625, + "loss_num": 0.09130859375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 35771804, + "step": 540 + }, + { + "epoch": 0.05063883558758834, + "grad_norm": 14.011467933654785, + "learning_rate": 5e-05, + "loss": 2.1245, + "num_input_tokens_seen": 35838284, + "step": 541 + }, + { + "epoch": 0.05063883558758834, + "loss": 2.0323169231414795, + "loss_ce": 0.007902797311544418, + "loss_iou": 0.796875, + "loss_num": 0.0859375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 35838284, + "step": 541 + }, + { + "epoch": 0.050732437871484064, + "grad_norm": 41.99628829956055, + "learning_rate": 5e-05, + "loss": 1.9095, + "num_input_tokens_seen": 35905368, + "step": 542 + }, + { + "epoch": 0.050732437871484064, + "loss": 2.227463722229004, + "loss_ce": 0.008713909424841404, + "loss_iou": 0.9375, + "loss_num": 0.068359375, + "loss_xval": 2.21875, + "num_input_tokens_seen": 35905368, + "step": 542 + }, + { + "epoch": 0.05082604015537979, + "grad_norm": 22.376237869262695, + "learning_rate": 5e-05, + "loss": 1.9594, + "num_input_tokens_seen": 35972180, + "step": 543 + }, + { + "epoch": 0.05082604015537979, + "loss": 1.9037367105484009, + "loss_ce": 0.005299141630530357, + "loss_iou": 0.80859375, + "loss_num": 0.0556640625, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 35972180, + "step": 543 + }, + { + "epoch": 0.05091964243927552, + "grad_norm": 15.99255657196045, + "learning_rate": 5e-05, + "loss": 2.2139, + "num_input_tokens_seen": 36037984, + "step": 544 + }, + { + "epoch": 0.05091964243927552, + "loss": 2.317139148712158, + "loss_ce": 0.007568897679448128, + "loss_iou": 0.94140625, + "loss_num": 0.08544921875, + "loss_xval": 2.3125, + "num_input_tokens_seen": 36037984, + "step": 544 + }, + { + "epoch": 0.051013244723171244, + "grad_norm": 10.45749282836914, + "learning_rate": 5e-05, + "loss": 1.9645, + "num_input_tokens_seen": 36105256, + "step": 545 + }, + { + "epoch": 0.051013244723171244, + "loss": 1.761710524559021, + "loss_ce": 0.005851097404956818, + "loss_iou": 0.75390625, + "loss_num": 0.050048828125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 36105256, + "step": 545 + }, + { + "epoch": 0.051106847007066976, + "grad_norm": 22.613332748413086, + "learning_rate": 5e-05, + "loss": 1.6618, + "num_input_tokens_seen": 36170560, + "step": 546 + }, + { + "epoch": 0.051106847007066976, + "loss": 1.6365282535552979, + "loss_ce": 0.006645415909588337, + "loss_iou": 0.6796875, + "loss_num": 0.0537109375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 36170560, + "step": 546 + }, + { + "epoch": 0.0512004492909627, + "grad_norm": 74.29531860351562, + "learning_rate": 5e-05, + "loss": 2.3012, + "num_input_tokens_seen": 36237660, + "step": 547 + }, + { + "epoch": 0.0512004492909627, + "loss": 2.348698854446411, + "loss_ce": 0.00787852331995964, + "loss_iou": 0.984375, + "loss_num": 0.07373046875, + "loss_xval": 2.34375, + "num_input_tokens_seen": 36237660, + "step": 547 + }, + { + "epoch": 0.051294051574858425, + "grad_norm": 19.30687141418457, + "learning_rate": 5e-05, + "loss": 2.1144, + "num_input_tokens_seen": 36304992, + "step": 548 + }, + { + "epoch": 0.051294051574858425, + "loss": 2.1274046897888184, + "loss_ce": 0.004357866011559963, + "loss_iou": 0.89453125, + "loss_num": 0.06640625, + "loss_xval": 2.125, + "num_input_tokens_seen": 36304992, + "step": 548 + }, + { + "epoch": 0.051387653858754156, + "grad_norm": 47.999019622802734, + "learning_rate": 5e-05, + "loss": 2.1222, + "num_input_tokens_seen": 36370964, + "step": 549 + }, + { + "epoch": 0.051387653858754156, + "loss": 2.293400764465332, + "loss_ce": 0.00336168403737247, + "loss_iou": 0.96484375, + "loss_num": 0.072265625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 36370964, + "step": 549 + }, + { + "epoch": 0.05148125614264988, + "grad_norm": 12.98134994506836, + "learning_rate": 5e-05, + "loss": 2.174, + "num_input_tokens_seen": 36436144, + "step": 550 + }, + { + "epoch": 0.05148125614264988, + "loss": 2.130186080932617, + "loss_ce": 0.007139342837035656, + "loss_iou": 0.859375, + "loss_num": 0.07958984375, + "loss_xval": 2.125, + "num_input_tokens_seen": 36436144, + "step": 550 + }, + { + "epoch": 0.051574858426545606, + "grad_norm": 29.445837020874023, + "learning_rate": 5e-05, + "loss": 1.8647, + "num_input_tokens_seen": 36502144, + "step": 551 + }, + { + "epoch": 0.051574858426545606, + "loss": 1.7941217422485352, + "loss_ce": 0.006035890430212021, + "loss_iou": 0.7578125, + "loss_num": 0.054443359375, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 36502144, + "step": 551 + }, + { + "epoch": 0.05166846071044134, + "grad_norm": 15.311250686645508, + "learning_rate": 5e-05, + "loss": 1.752, + "num_input_tokens_seen": 36568284, + "step": 552 + }, + { + "epoch": 0.05166846071044134, + "loss": 1.6321521997451782, + "loss_ce": 0.0071521904319524765, + "loss_iou": 0.62890625, + "loss_num": 0.0732421875, + "loss_xval": 1.625, + "num_input_tokens_seen": 36568284, + "step": 552 + }, + { + "epoch": 0.05176206299433706, + "grad_norm": 24.853679656982422, + "learning_rate": 5e-05, + "loss": 2.1207, + "num_input_tokens_seen": 36635384, + "step": 553 + }, + { + "epoch": 0.05176206299433706, + "loss": 1.975627064704895, + "loss_ce": 0.009806775487959385, + "loss_iou": 0.83203125, + "loss_num": 0.060546875, + "loss_xval": 1.96875, + "num_input_tokens_seen": 36635384, + "step": 553 + }, + { + "epoch": 0.051855665278232786, + "grad_norm": 14.415247917175293, + "learning_rate": 5e-05, + "loss": 1.9491, + "num_input_tokens_seen": 36702264, + "step": 554 + }, + { + "epoch": 0.051855665278232786, + "loss": 2.1889944076538086, + "loss_ce": 0.007353785447776318, + "loss_iou": 0.92578125, + "loss_num": 0.06591796875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 36702264, + "step": 554 + }, + { + "epoch": 0.05194926756212852, + "grad_norm": 13.853069305419922, + "learning_rate": 5e-05, + "loss": 2.0479, + "num_input_tokens_seen": 36768768, + "step": 555 + }, + { + "epoch": 0.05194926756212852, + "loss": 1.9762237071990967, + "loss_ce": 0.005520680919289589, + "loss_iou": 0.81640625, + "loss_num": 0.06787109375, + "loss_xval": 1.96875, + "num_input_tokens_seen": 36768768, + "step": 555 + }, + { + "epoch": 0.05204286984602424, + "grad_norm": 8.91629695892334, + "learning_rate": 5e-05, + "loss": 1.8298, + "num_input_tokens_seen": 36834424, + "step": 556 + }, + { + "epoch": 0.05204286984602424, + "loss": 1.9549953937530518, + "loss_ce": 0.003823609557002783, + "loss_iou": 0.8046875, + "loss_num": 0.068359375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 36834424, + "step": 556 + }, + { + "epoch": 0.05213647212991997, + "grad_norm": 15.065163612365723, + "learning_rate": 5e-05, + "loss": 1.8397, + "num_input_tokens_seen": 36901232, + "step": 557 + }, + { + "epoch": 0.05213647212991997, + "loss": 1.8255698680877686, + "loss_ce": 0.008187083527445793, + "loss_iou": 0.69921875, + "loss_num": 0.0830078125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 36901232, + "step": 557 + }, + { + "epoch": 0.0522300744138157, + "grad_norm": 84.12255859375, + "learning_rate": 5e-05, + "loss": 2.0238, + "num_input_tokens_seen": 36967616, + "step": 558 + }, + { + "epoch": 0.0522300744138157, + "loss": 2.149418354034424, + "loss_ce": 0.009281693957746029, + "loss_iou": 0.8671875, + "loss_num": 0.08203125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 36967616, + "step": 558 + }, + { + "epoch": 0.05232367669771142, + "grad_norm": 11.452712059020996, + "learning_rate": 5e-05, + "loss": 2.4079, + "num_input_tokens_seen": 37034176, + "step": 559 + }, + { + "epoch": 0.05232367669771142, + "loss": 2.4366798400878906, + "loss_ce": 0.006015540100634098, + "loss_iou": 1.0078125, + "loss_num": 0.08154296875, + "loss_xval": 2.4375, + "num_input_tokens_seen": 37034176, + "step": 559 + }, + { + "epoch": 0.052417278981607154, + "grad_norm": 12.024419784545898, + "learning_rate": 5e-05, + "loss": 2.1908, + "num_input_tokens_seen": 37099836, + "step": 560 + }, + { + "epoch": 0.052417278981607154, + "loss": 2.415208578109741, + "loss_ce": 0.007005504798144102, + "loss_iou": 0.98046875, + "loss_num": 0.08935546875, + "loss_xval": 2.40625, + "num_input_tokens_seen": 37099836, + "step": 560 + }, + { + "epoch": 0.05251088126550288, + "grad_norm": 18.38412094116211, + "learning_rate": 5e-05, + "loss": 2.0019, + "num_input_tokens_seen": 37166192, + "step": 561 + }, + { + "epoch": 0.05251088126550288, + "loss": 2.0711967945098877, + "loss_ce": 0.006743601523339748, + "loss_iou": 0.84765625, + "loss_num": 0.0732421875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 37166192, + "step": 561 + }, + { + "epoch": 0.052604483549398603, + "grad_norm": 15.95351505279541, + "learning_rate": 5e-05, + "loss": 1.943, + "num_input_tokens_seen": 37232400, + "step": 562 + }, + { + "epoch": 0.052604483549398603, + "loss": 1.9301117658615112, + "loss_ce": 0.0053070904687047005, + "loss_iou": 0.7734375, + "loss_num": 0.07568359375, + "loss_xval": 1.921875, + "num_input_tokens_seen": 37232400, + "step": 562 + }, + { + "epoch": 0.052698085833294335, + "grad_norm": 21.236230850219727, + "learning_rate": 5e-05, + "loss": 2.115, + "num_input_tokens_seen": 37300176, + "step": 563 + }, + { + "epoch": 0.052698085833294335, + "loss": 2.063028335571289, + "loss_ce": 0.009317463263869286, + "loss_iou": 0.85546875, + "loss_num": 0.06884765625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 37300176, + "step": 563 + }, + { + "epoch": 0.05279168811719006, + "grad_norm": 22.604427337646484, + "learning_rate": 5e-05, + "loss": 1.7916, + "num_input_tokens_seen": 37365768, + "step": 564 + }, + { + "epoch": 0.05279168811719006, + "loss": 1.671072006225586, + "loss_ce": 0.01677520014345646, + "loss_iou": 0.640625, + "loss_num": 0.0751953125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 37365768, + "step": 564 + }, + { + "epoch": 0.052885290401085784, + "grad_norm": 13.016477584838867, + "learning_rate": 5e-05, + "loss": 1.7893, + "num_input_tokens_seen": 37432448, + "step": 565 + }, + { + "epoch": 0.052885290401085784, + "loss": 1.833286166191101, + "loss_ce": 0.00516126211732626, + "loss_iou": 0.796875, + "loss_num": 0.047607421875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 37432448, + "step": 565 + }, + { + "epoch": 0.052978892684981516, + "grad_norm": 61.25498962402344, + "learning_rate": 5e-05, + "loss": 1.6456, + "num_input_tokens_seen": 37499932, + "step": 566 + }, + { + "epoch": 0.052978892684981516, + "loss": 1.5449293851852417, + "loss_ce": 0.00977318175137043, + "loss_iou": 0.66015625, + "loss_num": 0.04296875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 37499932, + "step": 566 + }, + { + "epoch": 0.05307249496887724, + "grad_norm": 27.738588333129883, + "learning_rate": 5e-05, + "loss": 1.9402, + "num_input_tokens_seen": 37565112, + "step": 567 + }, + { + "epoch": 0.05307249496887724, + "loss": 1.886468768119812, + "loss_ce": 0.0070742275565862656, + "loss_iou": 0.7109375, + "loss_num": 0.09130859375, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 37565112, + "step": 567 + }, + { + "epoch": 0.053166097252772965, + "grad_norm": 18.664432525634766, + "learning_rate": 5e-05, + "loss": 1.83, + "num_input_tokens_seen": 37630628, + "step": 568 + }, + { + "epoch": 0.053166097252772965, + "loss": 1.7699365615844727, + "loss_ce": 0.0038233078084886074, + "loss_iou": 0.7265625, + "loss_num": 0.06298828125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 37630628, + "step": 568 + }, + { + "epoch": 0.053259699536668696, + "grad_norm": 31.280494689941406, + "learning_rate": 5e-05, + "loss": 2.035, + "num_input_tokens_seen": 37695876, + "step": 569 + }, + { + "epoch": 0.053259699536668696, + "loss": 1.9658114910125732, + "loss_ce": 0.004874029662460089, + "loss_iou": 0.859375, + "loss_num": 0.04833984375, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 37695876, + "step": 569 + }, + { + "epoch": 0.05335330182056442, + "grad_norm": 12.460577011108398, + "learning_rate": 5e-05, + "loss": 2.2548, + "num_input_tokens_seen": 37761960, + "step": 570 + }, + { + "epoch": 0.05335330182056442, + "loss": 2.080930709838867, + "loss_ce": 0.007688452955335379, + "loss_iou": 0.828125, + "loss_num": 0.08349609375, + "loss_xval": 2.078125, + "num_input_tokens_seen": 37761960, + "step": 570 + }, + { + "epoch": 0.05344690410446015, + "grad_norm": 24.75887680053711, + "learning_rate": 5e-05, + "loss": 2.1912, + "num_input_tokens_seen": 37828200, + "step": 571 + }, + { + "epoch": 0.05344690410446015, + "loss": 2.2149462699890137, + "loss_ce": 0.006938466802239418, + "loss_iou": 0.8671875, + "loss_num": 0.09521484375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 37828200, + "step": 571 + }, + { + "epoch": 0.05354050638835588, + "grad_norm": 15.288697242736816, + "learning_rate": 5e-05, + "loss": 1.7935, + "num_input_tokens_seen": 37894312, + "step": 572 + }, + { + "epoch": 0.05354050638835588, + "loss": 1.7205119132995605, + "loss_ce": 0.005424095317721367, + "loss_iou": 0.71484375, + "loss_num": 0.057373046875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 37894312, + "step": 572 + }, + { + "epoch": 0.0536341086722516, + "grad_norm": 16.898698806762695, + "learning_rate": 5e-05, + "loss": 1.9252, + "num_input_tokens_seen": 37962084, + "step": 573 + }, + { + "epoch": 0.0536341086722516, + "loss": 2.0075607299804688, + "loss_ce": 0.0026777826715260744, + "loss_iou": 0.88671875, + "loss_num": 0.0458984375, + "loss_xval": 2.0, + "num_input_tokens_seen": 37962084, + "step": 573 + }, + { + "epoch": 0.05372771095614733, + "grad_norm": 29.335344314575195, + "learning_rate": 5e-05, + "loss": 1.9544, + "num_input_tokens_seen": 38028544, + "step": 574 + }, + { + "epoch": 0.05372771095614733, + "loss": 1.9636216163635254, + "loss_ce": 0.004637310281395912, + "loss_iou": 0.8359375, + "loss_num": 0.05810546875, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 38028544, + "step": 574 + }, + { + "epoch": 0.05382131324004306, + "grad_norm": 13.343282699584961, + "learning_rate": 5e-05, + "loss": 2.1594, + "num_input_tokens_seen": 38093708, + "step": 575 + }, + { + "epoch": 0.05382131324004306, + "loss": 2.0714712142944336, + "loss_ce": 0.008971377275884151, + "loss_iou": 0.828125, + "loss_num": 0.0810546875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 38093708, + "step": 575 + }, + { + "epoch": 0.05391491552393878, + "grad_norm": 28.67262840270996, + "learning_rate": 5e-05, + "loss": 1.8941, + "num_input_tokens_seen": 38159792, + "step": 576 + }, + { + "epoch": 0.05391491552393878, + "loss": 1.9878249168395996, + "loss_ce": 0.006379695143550634, + "loss_iou": 0.796875, + "loss_num": 0.07861328125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 38159792, + "step": 576 + }, + { + "epoch": 0.054008517807834514, + "grad_norm": 15.381333351135254, + "learning_rate": 5e-05, + "loss": 1.9251, + "num_input_tokens_seen": 38226032, + "step": 577 + }, + { + "epoch": 0.054008517807834514, + "loss": 1.8523845672607422, + "loss_ce": 0.006681526079773903, + "loss_iou": 0.76171875, + "loss_num": 0.064453125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 38226032, + "step": 577 + }, + { + "epoch": 0.05410212009173024, + "grad_norm": 26.613903045654297, + "learning_rate": 5e-05, + "loss": 2.0962, + "num_input_tokens_seen": 38292808, + "step": 578 + }, + { + "epoch": 0.05410212009173024, + "loss": 2.060774803161621, + "loss_ce": 0.007063917815685272, + "loss_iou": 0.88671875, + "loss_num": 0.05615234375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 38292808, + "step": 578 + }, + { + "epoch": 0.05419572237562596, + "grad_norm": 21.56264877319336, + "learning_rate": 5e-05, + "loss": 2.2777, + "num_input_tokens_seen": 38359224, + "step": 579 + }, + { + "epoch": 0.05419572237562596, + "loss": 2.1975607872009277, + "loss_ce": 0.007131216116249561, + "loss_iou": 0.9140625, + "loss_num": 0.07177734375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 38359224, + "step": 579 + }, + { + "epoch": 0.054289324659521694, + "grad_norm": 12.033671379089355, + "learning_rate": 5e-05, + "loss": 2.2074, + "num_input_tokens_seen": 38425232, + "step": 580 + }, + { + "epoch": 0.054289324659521694, + "loss": 2.2009353637695312, + "loss_ce": 0.007575890514999628, + "loss_iou": 0.921875, + "loss_num": 0.0703125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 38425232, + "step": 580 + }, + { + "epoch": 0.05438292694341742, + "grad_norm": 16.650014877319336, + "learning_rate": 5e-05, + "loss": 1.7616, + "num_input_tokens_seen": 38491244, + "step": 581 + }, + { + "epoch": 0.05438292694341742, + "loss": 1.8445103168487549, + "loss_ce": 0.004666535183787346, + "loss_iou": 0.76953125, + "loss_num": 0.059814453125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 38491244, + "step": 581 + }, + { + "epoch": 0.05447652922731314, + "grad_norm": 32.606571197509766, + "learning_rate": 5e-05, + "loss": 1.7856, + "num_input_tokens_seen": 38557004, + "step": 582 + }, + { + "epoch": 0.05447652922731314, + "loss": 1.910442590713501, + "loss_ce": 0.004680905491113663, + "loss_iou": 0.734375, + "loss_num": 0.087890625, + "loss_xval": 1.90625, + "num_input_tokens_seen": 38557004, + "step": 582 + }, + { + "epoch": 0.054570131511208875, + "grad_norm": 17.818336486816406, + "learning_rate": 5e-05, + "loss": 1.8917, + "num_input_tokens_seen": 38624396, + "step": 583 + }, + { + "epoch": 0.054570131511208875, + "loss": 1.8677129745483398, + "loss_ce": 0.005408269818872213, + "loss_iou": 0.796875, + "loss_num": 0.053955078125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 38624396, + "step": 583 + }, + { + "epoch": 0.0546637337951046, + "grad_norm": 20.405603408813477, + "learning_rate": 5e-05, + "loss": 2.127, + "num_input_tokens_seen": 38690540, + "step": 584 + }, + { + "epoch": 0.0546637337951046, + "loss": 1.979243278503418, + "loss_ce": 0.009516758844256401, + "loss_iou": 0.8359375, + "loss_num": 0.059814453125, + "loss_xval": 1.96875, + "num_input_tokens_seen": 38690540, + "step": 584 + }, + { + "epoch": 0.05475733607900033, + "grad_norm": 26.222898483276367, + "learning_rate": 5e-05, + "loss": 2.591, + "num_input_tokens_seen": 38756172, + "step": 585 + }, + { + "epoch": 0.05475733607900033, + "loss": 2.757906913757324, + "loss_ce": 0.007906868122518063, + "loss_iou": 1.1328125, + "loss_num": 0.09765625, + "loss_xval": 2.75, + "num_input_tokens_seen": 38756172, + "step": 585 + }, + { + "epoch": 0.054850938362896055, + "grad_norm": 11.419878005981445, + "learning_rate": 5e-05, + "loss": 1.9198, + "num_input_tokens_seen": 38823404, + "step": 586 + }, + { + "epoch": 0.054850938362896055, + "loss": 1.9623419046401978, + "loss_ce": 0.00531068816781044, + "loss_iou": 0.80859375, + "loss_num": 0.0673828125, + "loss_xval": 1.953125, + "num_input_tokens_seen": 38823404, + "step": 586 + }, + { + "epoch": 0.05494454064679178, + "grad_norm": 28.394208908081055, + "learning_rate": 5e-05, + "loss": 1.7159, + "num_input_tokens_seen": 38890444, + "step": 587 + }, + { + "epoch": 0.05494454064679178, + "loss": 1.6631174087524414, + "loss_ce": 0.005402637645602226, + "loss_iou": 0.72265625, + "loss_num": 0.04248046875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 38890444, + "step": 587 + }, + { + "epoch": 0.05503814293068751, + "grad_norm": 18.359874725341797, + "learning_rate": 5e-05, + "loss": 2.1832, + "num_input_tokens_seen": 38957780, + "step": 588 + }, + { + "epoch": 0.05503814293068751, + "loss": 2.311920642852783, + "loss_ce": 0.005279937759041786, + "loss_iou": 0.9375, + "loss_num": 0.08642578125, + "loss_xval": 2.3125, + "num_input_tokens_seen": 38957780, + "step": 588 + }, + { + "epoch": 0.055131745214583236, + "grad_norm": 11.638762474060059, + "learning_rate": 5e-05, + "loss": 2.0758, + "num_input_tokens_seen": 39025052, + "step": 589 + }, + { + "epoch": 0.055131745214583236, + "loss": 2.085104465484619, + "loss_ce": 0.00697963684797287, + "loss_iou": 0.859375, + "loss_num": 0.07080078125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 39025052, + "step": 589 + }, + { + "epoch": 0.05522534749847896, + "grad_norm": 14.214030265808105, + "learning_rate": 5e-05, + "loss": 1.7909, + "num_input_tokens_seen": 39091696, + "step": 590 + }, + { + "epoch": 0.05522534749847896, + "loss": 1.8609280586242676, + "loss_ce": 0.0035061007365584373, + "loss_iou": 0.78515625, + "loss_num": 0.0576171875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 39091696, + "step": 590 + }, + { + "epoch": 0.05531894978237469, + "grad_norm": 51.06943130493164, + "learning_rate": 5e-05, + "loss": 2.0519, + "num_input_tokens_seen": 39158200, + "step": 591 + }, + { + "epoch": 0.05531894978237469, + "loss": 2.1332483291625977, + "loss_ce": 0.0053186360746622086, + "loss_iou": 0.90625, + "loss_num": 0.06396484375, + "loss_xval": 2.125, + "num_input_tokens_seen": 39158200, + "step": 591 + }, + { + "epoch": 0.05541255206627042, + "grad_norm": 41.73386764526367, + "learning_rate": 5e-05, + "loss": 2.1648, + "num_input_tokens_seen": 39223556, + "step": 592 + }, + { + "epoch": 0.05541255206627042, + "loss": 2.313202381134033, + "loss_ce": 0.006561813876032829, + "loss_iou": 0.984375, + "loss_num": 0.06787109375, + "loss_xval": 2.3125, + "num_input_tokens_seen": 39223556, + "step": 592 + }, + { + "epoch": 0.05550615435016614, + "grad_norm": 14.052983283996582, + "learning_rate": 5e-05, + "loss": 2.2862, + "num_input_tokens_seen": 39289240, + "step": 593 + }, + { + "epoch": 0.05550615435016614, + "loss": 2.210081100463867, + "loss_ce": 0.005491130985319614, + "loss_iou": 0.8984375, + "loss_num": 0.08203125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 39289240, + "step": 593 + }, + { + "epoch": 0.05559975663406187, + "grad_norm": 21.231733322143555, + "learning_rate": 5e-05, + "loss": 2.2757, + "num_input_tokens_seen": 39356272, + "step": 594 + }, + { + "epoch": 0.05559975663406187, + "loss": 2.239682674407959, + "loss_ce": 0.006284385919570923, + "loss_iou": 0.921875, + "loss_num": 0.07861328125, + "loss_xval": 2.234375, + "num_input_tokens_seen": 39356272, + "step": 594 + }, + { + "epoch": 0.0556933589179576, + "grad_norm": 11.525460243225098, + "learning_rate": 5e-05, + "loss": 2.1444, + "num_input_tokens_seen": 39423392, + "step": 595 + }, + { + "epoch": 0.0556933589179576, + "loss": 2.2538256645202637, + "loss_ce": 0.005778972990810871, + "loss_iou": 0.90234375, + "loss_num": 0.08837890625, + "loss_xval": 2.25, + "num_input_tokens_seen": 39423392, + "step": 595 + }, + { + "epoch": 0.05578696120185333, + "grad_norm": 10.383962631225586, + "learning_rate": 5e-05, + "loss": 1.8663, + "num_input_tokens_seen": 39490420, + "step": 596 + }, + { + "epoch": 0.05578696120185333, + "loss": 1.938031792640686, + "loss_ce": 0.008344260044395924, + "loss_iou": 0.82421875, + "loss_num": 0.05615234375, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 39490420, + "step": 596 + }, + { + "epoch": 0.05588056348574905, + "grad_norm": 16.740312576293945, + "learning_rate": 5e-05, + "loss": 1.7252, + "num_input_tokens_seen": 39556180, + "step": 597 + }, + { + "epoch": 0.05588056348574905, + "loss": 1.7972288131713867, + "loss_ce": 0.005236555356532335, + "loss_iou": 0.73046875, + "loss_num": 0.0654296875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 39556180, + "step": 597 + }, + { + "epoch": 0.05597416576964478, + "grad_norm": 13.414369583129883, + "learning_rate": 5e-05, + "loss": 2.1265, + "num_input_tokens_seen": 39622388, + "step": 598 + }, + { + "epoch": 0.05597416576964478, + "loss": 1.9705634117126465, + "loss_ce": 0.004743036814033985, + "loss_iou": 0.859375, + "loss_num": 0.04931640625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 39622388, + "step": 598 + }, + { + "epoch": 0.05606776805354051, + "grad_norm": 25.500898361206055, + "learning_rate": 5e-05, + "loss": 1.7969, + "num_input_tokens_seen": 39687900, + "step": 599 + }, + { + "epoch": 0.05606776805354051, + "loss": 1.7924058437347412, + "loss_ce": 0.009202754124999046, + "loss_iou": 0.734375, + "loss_num": 0.062255859375, + "loss_xval": 1.78125, + "num_input_tokens_seen": 39687900, + "step": 599 + }, + { + "epoch": 0.056161370337436234, + "grad_norm": 18.349210739135742, + "learning_rate": 5e-05, + "loss": 1.9016, + "num_input_tokens_seen": 39754580, + "step": 600 + }, + { + "epoch": 0.056161370337436234, + "loss": 2.0565357208251953, + "loss_ce": 0.004777735099196434, + "loss_iou": 0.8515625, + "loss_num": 0.06982421875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 39754580, + "step": 600 + }, + { + "epoch": 0.05625497262133196, + "grad_norm": 14.774345397949219, + "learning_rate": 5e-05, + "loss": 2.0181, + "num_input_tokens_seen": 39820300, + "step": 601 + }, + { + "epoch": 0.05625497262133196, + "loss": 1.978788137435913, + "loss_ce": 0.010038059204816818, + "loss_iou": 0.859375, + "loss_num": 0.04931640625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 39820300, + "step": 601 + }, + { + "epoch": 0.05634857490522769, + "grad_norm": 15.486387252807617, + "learning_rate": 5e-05, + "loss": 1.7805, + "num_input_tokens_seen": 39886928, + "step": 602 + }, + { + "epoch": 0.05634857490522769, + "loss": 1.693159580230713, + "loss_ce": 0.00395065825432539, + "loss_iou": 0.70703125, + "loss_num": 0.055419921875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 39886928, + "step": 602 + }, + { + "epoch": 0.056442177189123414, + "grad_norm": 41.67619705200195, + "learning_rate": 5e-05, + "loss": 1.9223, + "num_input_tokens_seen": 39953608, + "step": 603 + }, + { + "epoch": 0.056442177189123414, + "loss": 1.941701054573059, + "loss_ce": 0.0032246247865259647, + "loss_iou": 0.84765625, + "loss_num": 0.049072265625, + "loss_xval": 1.9375, + "num_input_tokens_seen": 39953608, + "step": 603 + }, + { + "epoch": 0.05653577947301914, + "grad_norm": 10.798108100891113, + "learning_rate": 5e-05, + "loss": 2.2574, + "num_input_tokens_seen": 40019960, + "step": 604 + }, + { + "epoch": 0.05653577947301914, + "loss": 2.2095208168029785, + "loss_ce": 0.01420820876955986, + "loss_iou": 0.91015625, + "loss_num": 0.07470703125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 40019960, + "step": 604 + }, + { + "epoch": 0.05662938175691487, + "grad_norm": 13.647797584533691, + "learning_rate": 5e-05, + "loss": 2.004, + "num_input_tokens_seen": 40086084, + "step": 605 + }, + { + "epoch": 0.05662938175691487, + "loss": 2.0922958850860596, + "loss_ce": 0.004405309911817312, + "loss_iou": 0.88671875, + "loss_num": 0.0625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 40086084, + "step": 605 + }, + { + "epoch": 0.056722984040810595, + "grad_norm": 12.498695373535156, + "learning_rate": 5e-05, + "loss": 1.7111, + "num_input_tokens_seen": 40152652, + "step": 606 + }, + { + "epoch": 0.056722984040810595, + "loss": 1.5924022197723389, + "loss_ce": 0.0028025759384036064, + "loss_iou": 0.66796875, + "loss_num": 0.051025390625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 40152652, + "step": 606 + }, + { + "epoch": 0.05681658632470632, + "grad_norm": 20.511404037475586, + "learning_rate": 5e-05, + "loss": 1.8864, + "num_input_tokens_seen": 40218728, + "step": 607 + }, + { + "epoch": 0.05681658632470632, + "loss": 1.9277386665344238, + "loss_ce": 0.0039105541072785854, + "loss_iou": 0.83203125, + "loss_num": 0.052734375, + "loss_xval": 1.921875, + "num_input_tokens_seen": 40218728, + "step": 607 + }, + { + "epoch": 0.05691018860860205, + "grad_norm": 13.575655937194824, + "learning_rate": 5e-05, + "loss": 1.6843, + "num_input_tokens_seen": 40284284, + "step": 608 + }, + { + "epoch": 0.05691018860860205, + "loss": 1.7808220386505127, + "loss_ce": 0.005614531692117453, + "loss_iou": 0.76953125, + "loss_num": 0.047119140625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 40284284, + "step": 608 + }, + { + "epoch": 0.057003790892497776, + "grad_norm": 39.97224807739258, + "learning_rate": 5e-05, + "loss": 2.0374, + "num_input_tokens_seen": 40349908, + "step": 609 + }, + { + "epoch": 0.057003790892497776, + "loss": 2.0007576942443848, + "loss_ce": 0.005640394054353237, + "loss_iou": 0.8359375, + "loss_num": 0.064453125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 40349908, + "step": 609 + }, + { + "epoch": 0.05709739317639351, + "grad_norm": 17.985727310180664, + "learning_rate": 5e-05, + "loss": 2.2559, + "num_input_tokens_seen": 40415248, + "step": 610 + }, + { + "epoch": 0.05709739317639351, + "loss": 2.138763427734375, + "loss_ce": 0.00790414959192276, + "loss_iou": 0.92578125, + "loss_num": 0.055419921875, + "loss_xval": 2.125, + "num_input_tokens_seen": 40415248, + "step": 610 + }, + { + "epoch": 0.05719099546028923, + "grad_norm": 15.21214485168457, + "learning_rate": 5e-05, + "loss": 2.0555, + "num_input_tokens_seen": 40480984, + "step": 611 + }, + { + "epoch": 0.05719099546028923, + "loss": 2.0109291076660156, + "loss_ce": 0.006046364549547434, + "loss_iou": 0.828125, + "loss_num": 0.06982421875, + "loss_xval": 2.0, + "num_input_tokens_seen": 40480984, + "step": 611 + }, + { + "epoch": 0.057284597744184956, + "grad_norm": 72.22217559814453, + "learning_rate": 5e-05, + "loss": 1.9259, + "num_input_tokens_seen": 40547572, + "step": 612 + }, + { + "epoch": 0.057284597744184956, + "loss": 1.910017490386963, + "loss_ce": 0.005720725283026695, + "loss_iou": 0.8359375, + "loss_num": 0.046875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 40547572, + "step": 612 + }, + { + "epoch": 0.05737820002808069, + "grad_norm": 14.137224197387695, + "learning_rate": 5e-05, + "loss": 2.3107, + "num_input_tokens_seen": 40614432, + "step": 613 + }, + { + "epoch": 0.05737820002808069, + "loss": 2.463726043701172, + "loss_ce": 0.004741773474961519, + "loss_iou": 1.03125, + "loss_num": 0.08056640625, + "loss_xval": 2.453125, + "num_input_tokens_seen": 40614432, + "step": 613 + }, + { + "epoch": 0.05747180231197641, + "grad_norm": 8.538642883300781, + "learning_rate": 5e-05, + "loss": 2.183, + "num_input_tokens_seen": 40680396, + "step": 614 + }, + { + "epoch": 0.05747180231197641, + "loss": 2.1091527938842773, + "loss_ce": 0.00392822315916419, + "loss_iou": 0.828125, + "loss_num": 0.0908203125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 40680396, + "step": 614 + }, + { + "epoch": 0.05756540459587214, + "grad_norm": 15.769079208374023, + "learning_rate": 5e-05, + "loss": 1.6501, + "num_input_tokens_seen": 40747076, + "step": 615 + }, + { + "epoch": 0.05756540459587214, + "loss": 1.5182501077651978, + "loss_ce": 0.007507950533181429, + "loss_iou": 0.6171875, + "loss_num": 0.05517578125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 40747076, + "step": 615 + }, + { + "epoch": 0.05765900687976787, + "grad_norm": 16.978391647338867, + "learning_rate": 5e-05, + "loss": 1.9324, + "num_input_tokens_seen": 40812900, + "step": 616 + }, + { + "epoch": 0.05765900687976787, + "loss": 1.905989646911621, + "loss_ce": 0.0021810298785567284, + "loss_iou": 0.76953125, + "loss_num": 0.07275390625, + "loss_xval": 1.90625, + "num_input_tokens_seen": 40812900, + "step": 616 + }, + { + "epoch": 0.05775260916366359, + "grad_norm": 21.10886573791504, + "learning_rate": 5e-05, + "loss": 1.8902, + "num_input_tokens_seen": 40879000, + "step": 617 + }, + { + "epoch": 0.05775260916366359, + "loss": 1.7256581783294678, + "loss_ce": 0.009837780147790909, + "loss_iou": 0.734375, + "loss_num": 0.049072265625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 40879000, + "step": 617 + }, + { + "epoch": 0.05784621144755932, + "grad_norm": 41.98634338378906, + "learning_rate": 5e-05, + "loss": 2.1092, + "num_input_tokens_seen": 40945376, + "step": 618 + }, + { + "epoch": 0.05784621144755932, + "loss": 2.1062259674072266, + "loss_ce": 0.003686754498630762, + "loss_iou": 0.859375, + "loss_num": 0.076171875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 40945376, + "step": 618 + }, + { + "epoch": 0.05793981373145505, + "grad_norm": 37.540283203125, + "learning_rate": 5e-05, + "loss": 2.1491, + "num_input_tokens_seen": 41012208, + "step": 619 + }, + { + "epoch": 0.05793981373145505, + "loss": 2.153731346130371, + "loss_ce": 0.006270356476306915, + "loss_iou": 0.93359375, + "loss_num": 0.056640625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 41012208, + "step": 619 + }, + { + "epoch": 0.058033416015350774, + "grad_norm": 13.815699577331543, + "learning_rate": 5e-05, + "loss": 2.2309, + "num_input_tokens_seen": 41078144, + "step": 620 + }, + { + "epoch": 0.058033416015350774, + "loss": 2.1575770378112793, + "loss_ce": 0.0071863653138279915, + "loss_iou": 0.9140625, + "loss_num": 0.06494140625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 41078144, + "step": 620 + }, + { + "epoch": 0.0581270182992465, + "grad_norm": 12.515998840332031, + "learning_rate": 5e-05, + "loss": 1.9803, + "num_input_tokens_seen": 41145216, + "step": 621 + }, + { + "epoch": 0.0581270182992465, + "loss": 2.062647819519043, + "loss_ce": 0.003077602479606867, + "loss_iou": 0.8984375, + "loss_num": 0.05322265625, + "loss_xval": 2.0625, + "num_input_tokens_seen": 41145216, + "step": 621 + }, + { + "epoch": 0.05822062058314223, + "grad_norm": 36.62022018432617, + "learning_rate": 5e-05, + "loss": 1.8181, + "num_input_tokens_seen": 41211472, + "step": 622 + }, + { + "epoch": 0.05822062058314223, + "loss": 1.7689921855926514, + "loss_ce": 0.006296772044152021, + "loss_iou": 0.734375, + "loss_num": 0.058837890625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 41211472, + "step": 622 + }, + { + "epoch": 0.058314222867037954, + "grad_norm": 27.370723724365234, + "learning_rate": 5e-05, + "loss": 1.8636, + "num_input_tokens_seen": 41279088, + "step": 623 + }, + { + "epoch": 0.058314222867037954, + "loss": 1.9225337505340576, + "loss_ce": 0.004564869683235884, + "loss_iou": 0.828125, + "loss_num": 0.05224609375, + "loss_xval": 1.921875, + "num_input_tokens_seen": 41279088, + "step": 623 + }, + { + "epoch": 0.058407825150933686, + "grad_norm": 25.868284225463867, + "learning_rate": 5e-05, + "loss": 2.2106, + "num_input_tokens_seen": 41344980, + "step": 624 + }, + { + "epoch": 0.058407825150933686, + "loss": 2.377711296081543, + "loss_ce": 0.005641047842800617, + "loss_iou": 0.9921875, + "loss_num": 0.07861328125, + "loss_xval": 2.375, + "num_input_tokens_seen": 41344980, + "step": 624 + }, + { + "epoch": 0.05850142743482941, + "grad_norm": 13.196389198303223, + "learning_rate": 5e-05, + "loss": 1.9674, + "num_input_tokens_seen": 41411676, + "step": 625 + }, + { + "epoch": 0.05850142743482941, + "loss": 1.997706413269043, + "loss_ce": 0.0025892811827361584, + "loss_iou": 0.8515625, + "loss_num": 0.05908203125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 41411676, + "step": 625 + }, + { + "epoch": 0.058595029718725135, + "grad_norm": 9.63979721069336, + "learning_rate": 5e-05, + "loss": 1.7643, + "num_input_tokens_seen": 41477900, + "step": 626 + }, + { + "epoch": 0.058595029718725135, + "loss": 1.5383410453796387, + "loss_ce": 0.003428956028074026, + "loss_iou": 0.65625, + "loss_num": 0.044921875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 41477900, + "step": 626 + }, + { + "epoch": 0.058688632002620866, + "grad_norm": 18.528329849243164, + "learning_rate": 5e-05, + "loss": 1.583, + "num_input_tokens_seen": 41544016, + "step": 627 + }, + { + "epoch": 0.058688632002620866, + "loss": 1.6905781030654907, + "loss_ce": 0.007960943505167961, + "loss_iou": 0.703125, + "loss_num": 0.0556640625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 41544016, + "step": 627 + }, + { + "epoch": 0.05878223428651659, + "grad_norm": 23.660940170288086, + "learning_rate": 5e-05, + "loss": 2.2274, + "num_input_tokens_seen": 41610424, + "step": 628 + }, + { + "epoch": 0.05878223428651659, + "loss": 2.3859333992004395, + "loss_ce": 0.007027099374681711, + "loss_iou": 1.0546875, + "loss_num": 0.054931640625, + "loss_xval": 2.375, + "num_input_tokens_seen": 41610424, + "step": 628 + }, + { + "epoch": 0.058875836570412315, + "grad_norm": 12.01035213470459, + "learning_rate": 5e-05, + "loss": 1.9972, + "num_input_tokens_seen": 41675756, + "step": 629 + }, + { + "epoch": 0.058875836570412315, + "loss": 1.9726507663726807, + "loss_ce": 0.004877309314906597, + "loss_iou": 0.84765625, + "loss_num": 0.054931640625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 41675756, + "step": 629 + }, + { + "epoch": 0.05896943885430805, + "grad_norm": 12.94049072265625, + "learning_rate": 5e-05, + "loss": 1.9517, + "num_input_tokens_seen": 41742104, + "step": 630 + }, + { + "epoch": 0.05896943885430805, + "loss": 1.667283296585083, + "loss_ce": 0.004197373986244202, + "loss_iou": 0.72265625, + "loss_num": 0.044189453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 41742104, + "step": 630 + }, + { + "epoch": 0.05906304113820377, + "grad_norm": 11.238037109375, + "learning_rate": 5e-05, + "loss": 1.8699, + "num_input_tokens_seen": 41807904, + "step": 631 + }, + { + "epoch": 0.05906304113820377, + "loss": 1.8402600288391113, + "loss_ce": 0.006275675259530544, + "loss_iou": 0.8125, + "loss_num": 0.0419921875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 41807904, + "step": 631 + }, + { + "epoch": 0.059156643422099496, + "grad_norm": 25.944297790527344, + "learning_rate": 5e-05, + "loss": 1.9746, + "num_input_tokens_seen": 41874464, + "step": 632 + }, + { + "epoch": 0.059156643422099496, + "loss": 2.068049669265747, + "loss_ce": 0.004573073238134384, + "loss_iou": 0.8828125, + "loss_num": 0.059814453125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 41874464, + "step": 632 + }, + { + "epoch": 0.05925024570599523, + "grad_norm": 16.457990646362305, + "learning_rate": 5e-05, + "loss": 2.1437, + "num_input_tokens_seen": 41941224, + "step": 633 + }, + { + "epoch": 0.05925024570599523, + "loss": 2.3428397178649902, + "loss_ce": 0.006902330555021763, + "loss_iou": 1.0, + "loss_num": 0.06787109375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 41941224, + "step": 633 + }, + { + "epoch": 0.05934384798989095, + "grad_norm": 12.140624046325684, + "learning_rate": 5e-05, + "loss": 1.9177, + "num_input_tokens_seen": 42007972, + "step": 634 + }, + { + "epoch": 0.05934384798989095, + "loss": 1.873486042022705, + "loss_ce": 0.003368981881067157, + "loss_iou": 0.7734375, + "loss_num": 0.06494140625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 42007972, + "step": 634 + }, + { + "epoch": 0.059437450273786684, + "grad_norm": 27.34258270263672, + "learning_rate": 5e-05, + "loss": 1.9138, + "num_input_tokens_seen": 42074660, + "step": 635 + }, + { + "epoch": 0.059437450273786684, + "loss": 1.9723542928695679, + "loss_ce": 0.010440289974212646, + "loss_iou": 0.8046875, + "loss_num": 0.07080078125, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 42074660, + "step": 635 + }, + { + "epoch": 0.05953105255768241, + "grad_norm": 11.294136047363281, + "learning_rate": 5e-05, + "loss": 2.4134, + "num_input_tokens_seen": 42141196, + "step": 636 + }, + { + "epoch": 0.05953105255768241, + "loss": 2.4120917320251465, + "loss_ce": 0.005841867998242378, + "loss_iou": 1.0390625, + "loss_num": 0.06689453125, + "loss_xval": 2.40625, + "num_input_tokens_seen": 42141196, + "step": 636 + }, + { + "epoch": 0.05962465484157813, + "grad_norm": 12.519861221313477, + "learning_rate": 5e-05, + "loss": 1.9338, + "num_input_tokens_seen": 42207028, + "step": 637 + }, + { + "epoch": 0.05962465484157813, + "loss": 1.930042028427124, + "loss_ce": 0.0037725483998656273, + "loss_iou": 0.7734375, + "loss_num": 0.076171875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 42207028, + "step": 637 + }, + { + "epoch": 0.059718257125473864, + "grad_norm": 14.694958686828613, + "learning_rate": 5e-05, + "loss": 1.8074, + "num_input_tokens_seen": 42273756, + "step": 638 + }, + { + "epoch": 0.059718257125473864, + "loss": 1.8845767974853516, + "loss_ce": 0.0037173698656260967, + "loss_iou": 0.7890625, + "loss_num": 0.060546875, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 42273756, + "step": 638 + }, + { + "epoch": 0.05981185940936959, + "grad_norm": 30.116161346435547, + "learning_rate": 5e-05, + "loss": 1.8737, + "num_input_tokens_seen": 42339516, + "step": 639 + }, + { + "epoch": 0.05981185940936959, + "loss": 1.8496448993682861, + "loss_ce": 0.0029652677476406097, + "loss_iou": 0.7890625, + "loss_num": 0.05419921875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 42339516, + "step": 639 + }, + { + "epoch": 0.05990546169326531, + "grad_norm": 12.257732391357422, + "learning_rate": 5e-05, + "loss": 2.1148, + "num_input_tokens_seen": 42406456, + "step": 640 + }, + { + "epoch": 0.05990546169326531, + "loss": 2.2025976181030273, + "loss_ce": 0.0024022057186812162, + "loss_iou": 0.9375, + "loss_num": 0.0654296875, + "loss_xval": 2.203125, + "num_input_tokens_seen": 42406456, + "step": 640 + }, + { + "epoch": 0.059999063977161045, + "grad_norm": 10.195981979370117, + "learning_rate": 5e-05, + "loss": 2.0939, + "num_input_tokens_seen": 42473056, + "step": 641 + }, + { + "epoch": 0.059999063977161045, + "loss": 2.048806667327881, + "loss_ce": 0.005837919190526009, + "loss_iou": 0.82421875, + "loss_num": 0.07958984375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 42473056, + "step": 641 + }, + { + "epoch": 0.06009266626105677, + "grad_norm": 21.322961807250977, + "learning_rate": 5e-05, + "loss": 1.8452, + "num_input_tokens_seen": 42539812, + "step": 642 + }, + { + "epoch": 0.06009266626105677, + "loss": 1.949399709701538, + "loss_ce": 0.004087238572537899, + "loss_iou": 0.79296875, + "loss_num": 0.07275390625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 42539812, + "step": 642 + }, + { + "epoch": 0.060186268544952494, + "grad_norm": 63.77423095703125, + "learning_rate": 5e-05, + "loss": 1.7791, + "num_input_tokens_seen": 42607164, + "step": 643 + }, + { + "epoch": 0.060186268544952494, + "loss": 1.7822136878967285, + "loss_ce": 0.0029168049804866314, + "loss_iou": 0.7734375, + "loss_num": 0.047119140625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 42607164, + "step": 643 + }, + { + "epoch": 0.060279870828848225, + "grad_norm": 19.314645767211914, + "learning_rate": 5e-05, + "loss": 1.8867, + "num_input_tokens_seen": 42671928, + "step": 644 + }, + { + "epoch": 0.060279870828848225, + "loss": 1.986924171447754, + "loss_ce": 0.010361703112721443, + "loss_iou": 0.8359375, + "loss_num": 0.06005859375, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 42671928, + "step": 644 + }, + { + "epoch": 0.06037347311274395, + "grad_norm": 9.597721099853516, + "learning_rate": 5e-05, + "loss": 1.5429, + "num_input_tokens_seen": 42736592, + "step": 645 + }, + { + "epoch": 0.06037347311274395, + "loss": 1.4693222045898438, + "loss_ce": 0.004478428978472948, + "loss_iou": 0.58203125, + "loss_num": 0.060791015625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 42736592, + "step": 645 + }, + { + "epoch": 0.060467075396639675, + "grad_norm": 8.470300674438477, + "learning_rate": 5e-05, + "loss": 1.6664, + "num_input_tokens_seen": 42802048, + "step": 646 + }, + { + "epoch": 0.060467075396639675, + "loss": 1.5450406074523926, + "loss_ce": 0.008907753974199295, + "loss_iou": 0.60546875, + "loss_num": 0.0654296875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 42802048, + "step": 646 + }, + { + "epoch": 0.060560677680535406, + "grad_norm": 25.71698570251465, + "learning_rate": 5e-05, + "loss": 1.6715, + "num_input_tokens_seen": 42868104, + "step": 647 + }, + { + "epoch": 0.060560677680535406, + "loss": 1.5138490200042725, + "loss_ce": 0.007501414977014065, + "loss_iou": 0.640625, + "loss_num": 0.0458984375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 42868104, + "step": 647 + }, + { + "epoch": 0.06065427996443113, + "grad_norm": 18.959789276123047, + "learning_rate": 5e-05, + "loss": 1.5311, + "num_input_tokens_seen": 42933812, + "step": 648 + }, + { + "epoch": 0.06065427996443113, + "loss": 1.1889536380767822, + "loss_ce": 0.004017080180346966, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 42933812, + "step": 648 + }, + { + "epoch": 0.06074788224832686, + "grad_norm": 22.170230865478516, + "learning_rate": 5e-05, + "loss": 1.8908, + "num_input_tokens_seen": 43001100, + "step": 649 + }, + { + "epoch": 0.06074788224832686, + "loss": 1.8376333713531494, + "loss_ce": 0.0026724031195044518, + "loss_iou": 0.8125, + "loss_num": 0.0419921875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 43001100, + "step": 649 + }, + { + "epoch": 0.06084148453222259, + "grad_norm": 46.66634750366211, + "learning_rate": 5e-05, + "loss": 2.23, + "num_input_tokens_seen": 43068388, + "step": 650 + }, + { + "epoch": 0.06084148453222259, + "loss": 2.2623658180236816, + "loss_ce": 0.005530000198632479, + "loss_iou": 0.92578125, + "loss_num": 0.08203125, + "loss_xval": 2.25, + "num_input_tokens_seen": 43068388, + "step": 650 + }, + { + "epoch": 0.06093508681611831, + "grad_norm": 18.600927352905273, + "learning_rate": 5e-05, + "loss": 1.8137, + "num_input_tokens_seen": 43135532, + "step": 651 + }, + { + "epoch": 0.06093508681611831, + "loss": 1.8081564903259277, + "loss_ce": 0.006398716941475868, + "loss_iou": 0.7734375, + "loss_num": 0.050048828125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 43135532, + "step": 651 + }, + { + "epoch": 0.06102868910001404, + "grad_norm": 21.627056121826172, + "learning_rate": 5e-05, + "loss": 1.7722, + "num_input_tokens_seen": 43201156, + "step": 652 + }, + { + "epoch": 0.06102868910001404, + "loss": 1.8300975561141968, + "loss_ce": 0.002949172630906105, + "loss_iou": 0.75, + "loss_num": 0.0654296875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 43201156, + "step": 652 + }, + { + "epoch": 0.06112229138390977, + "grad_norm": 18.532445907592773, + "learning_rate": 5e-05, + "loss": 1.7842, + "num_input_tokens_seen": 43267520, + "step": 653 + }, + { + "epoch": 0.06112229138390977, + "loss": 1.8301780223846436, + "loss_ce": 0.00498274527490139, + "loss_iou": 0.80078125, + "loss_num": 0.04443359375, + "loss_xval": 1.828125, + "num_input_tokens_seen": 43267520, + "step": 653 + }, + { + "epoch": 0.06121589366780549, + "grad_norm": 26.2631893157959, + "learning_rate": 5e-05, + "loss": 1.7561, + "num_input_tokens_seen": 43334640, + "step": 654 + }, + { + "epoch": 0.06121589366780549, + "loss": 1.7084355354309082, + "loss_ce": 0.004333991091698408, + "loss_iou": 0.75, + "loss_num": 0.041748046875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 43334640, + "step": 654 + }, + { + "epoch": 0.06130949595170122, + "grad_norm": 18.518526077270508, + "learning_rate": 5e-05, + "loss": 2.4008, + "num_input_tokens_seen": 43400440, + "step": 655 + }, + { + "epoch": 0.06130949595170122, + "loss": 2.390713691711426, + "loss_ce": 0.004971439018845558, + "loss_iou": 1.03125, + "loss_num": 0.06396484375, + "loss_xval": 2.390625, + "num_input_tokens_seen": 43400440, + "step": 655 + }, + { + "epoch": 0.06140309823559695, + "grad_norm": 11.645074844360352, + "learning_rate": 5e-05, + "loss": 2.0933, + "num_input_tokens_seen": 43466484, + "step": 656 + }, + { + "epoch": 0.06140309823559695, + "loss": 2.1642441749572754, + "loss_ce": 0.017759662121534348, + "loss_iou": 0.88671875, + "loss_num": 0.07421875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 43466484, + "step": 656 + }, + { + "epoch": 0.06149670051949267, + "grad_norm": 11.377114295959473, + "learning_rate": 5e-05, + "loss": 1.7413, + "num_input_tokens_seen": 43532876, + "step": 657 + }, + { + "epoch": 0.06149670051949267, + "loss": 1.8291823863983154, + "loss_ce": 0.007893272675573826, + "loss_iou": 0.76953125, + "loss_num": 0.05615234375, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 43532876, + "step": 657 + }, + { + "epoch": 0.061590302803388404, + "grad_norm": 19.197315216064453, + "learning_rate": 5e-05, + "loss": 1.8288, + "num_input_tokens_seen": 43598872, + "step": 658 + }, + { + "epoch": 0.061590302803388404, + "loss": 1.8198094367980957, + "loss_ce": 0.00828595645725727, + "loss_iou": 0.75, + "loss_num": 0.0625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 43598872, + "step": 658 + }, + { + "epoch": 0.06168390508728413, + "grad_norm": 19.02873420715332, + "learning_rate": 5e-05, + "loss": 2.044, + "num_input_tokens_seen": 43665696, + "step": 659 + }, + { + "epoch": 0.06168390508728413, + "loss": 2.211949586868286, + "loss_ce": 0.006871475838124752, + "loss_iou": 0.9140625, + "loss_num": 0.07568359375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 43665696, + "step": 659 + }, + { + "epoch": 0.06177750737117986, + "grad_norm": 11.751163482666016, + "learning_rate": 5e-05, + "loss": 1.9957, + "num_input_tokens_seen": 43731168, + "step": 660 + }, + { + "epoch": 0.06177750737117986, + "loss": 1.927664041519165, + "loss_ce": 0.006765660829842091, + "loss_iou": 0.78125, + "loss_num": 0.0712890625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 43731168, + "step": 660 + }, + { + "epoch": 0.061871109655075585, + "grad_norm": 13.9652099609375, + "learning_rate": 5e-05, + "loss": 1.7296, + "num_input_tokens_seen": 43797268, + "step": 661 + }, + { + "epoch": 0.061871109655075585, + "loss": 1.8963115215301514, + "loss_ce": 0.0066630351357162, + "loss_iou": 0.78515625, + "loss_num": 0.06396484375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 43797268, + "step": 661 + }, + { + "epoch": 0.06196471193897131, + "grad_norm": 21.194671630859375, + "learning_rate": 5e-05, + "loss": 1.7519, + "num_input_tokens_seen": 43864256, + "step": 662 + }, + { + "epoch": 0.06196471193897131, + "loss": 1.8214362859725952, + "loss_ce": 0.006006590090692043, + "loss_iou": 0.78125, + "loss_num": 0.05029296875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 43864256, + "step": 662 + }, + { + "epoch": 0.06205831422286704, + "grad_norm": 23.50273323059082, + "learning_rate": 5e-05, + "loss": 1.9001, + "num_input_tokens_seen": 43930312, + "step": 663 + }, + { + "epoch": 0.06205831422286704, + "loss": 2.0349676609039307, + "loss_ce": 0.0017646612832322717, + "loss_iou": 0.91015625, + "loss_num": 0.043212890625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 43930312, + "step": 663 + }, + { + "epoch": 0.062151916506762765, + "grad_norm": 16.14592933654785, + "learning_rate": 5e-05, + "loss": 1.8992, + "num_input_tokens_seen": 43996108, + "step": 664 + }, + { + "epoch": 0.062151916506762765, + "loss": 1.7679128646850586, + "loss_ce": 0.006194200832396746, + "loss_iou": 0.76953125, + "loss_num": 0.0439453125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 43996108, + "step": 664 + }, + { + "epoch": 0.06224551879065849, + "grad_norm": 18.69837188720703, + "learning_rate": 5e-05, + "loss": 1.7899, + "num_input_tokens_seen": 44062072, + "step": 665 + }, + { + "epoch": 0.06224551879065849, + "loss": 1.9077510833740234, + "loss_ce": 0.009313540533185005, + "loss_iou": 0.79296875, + "loss_num": 0.06298828125, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 44062072, + "step": 665 + }, + { + "epoch": 0.06233912107455422, + "grad_norm": 24.879642486572266, + "learning_rate": 5e-05, + "loss": 2.2473, + "num_input_tokens_seen": 44129180, + "step": 666 + }, + { + "epoch": 0.06233912107455422, + "loss": 2.0507569313049316, + "loss_ce": 0.008764711208641529, + "loss_iou": 0.91015625, + "loss_num": 0.044677734375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 44129180, + "step": 666 + }, + { + "epoch": 0.062432723358449946, + "grad_norm": 13.696084976196289, + "learning_rate": 5e-05, + "loss": 1.8711, + "num_input_tokens_seen": 44193736, + "step": 667 + }, + { + "epoch": 0.062432723358449946, + "loss": 1.863105297088623, + "loss_ce": 0.0056832898408174515, + "loss_iou": 0.8046875, + "loss_num": 0.050537109375, + "loss_xval": 1.859375, + "num_input_tokens_seen": 44193736, + "step": 667 + }, + { + "epoch": 0.06252632564234567, + "grad_norm": 13.253811836242676, + "learning_rate": 5e-05, + "loss": 1.8573, + "num_input_tokens_seen": 44259736, + "step": 668 + }, + { + "epoch": 0.06252632564234567, + "loss": 1.827341914176941, + "loss_ce": 0.004099718295037746, + "loss_iou": 0.77734375, + "loss_num": 0.05419921875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 44259736, + "step": 668 + }, + { + "epoch": 0.0626199279262414, + "grad_norm": 65.46825408935547, + "learning_rate": 5e-05, + "loss": 1.6073, + "num_input_tokens_seen": 44326656, + "step": 669 + }, + { + "epoch": 0.0626199279262414, + "loss": 1.829795002937317, + "loss_ce": 0.0036230827681720257, + "loss_iou": 0.8203125, + "loss_num": 0.0380859375, + "loss_xval": 1.828125, + "num_input_tokens_seen": 44326656, + "step": 669 + }, + { + "epoch": 0.06271353021013713, + "grad_norm": 15.645062446594238, + "learning_rate": 5e-05, + "loss": 1.7469, + "num_input_tokens_seen": 44392964, + "step": 670 + }, + { + "epoch": 0.06271353021013713, + "loss": 1.6751956939697266, + "loss_ce": 0.009180095978081226, + "loss_iou": 0.6875, + "loss_num": 0.057861328125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 44392964, + "step": 670 + }, + { + "epoch": 0.06280713249403286, + "grad_norm": 21.835399627685547, + "learning_rate": 5e-05, + "loss": 1.7665, + "num_input_tokens_seen": 44457860, + "step": 671 + }, + { + "epoch": 0.06280713249403286, + "loss": 1.8617095947265625, + "loss_ce": 0.002334624994546175, + "loss_iou": 0.796875, + "loss_num": 0.053466796875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 44457860, + "step": 671 + }, + { + "epoch": 0.06290073477792858, + "grad_norm": 17.84896469116211, + "learning_rate": 5e-05, + "loss": 2.0174, + "num_input_tokens_seen": 44524384, + "step": 672 + }, + { + "epoch": 0.06290073477792858, + "loss": 1.902989387512207, + "loss_ce": 0.0035754013806581497, + "loss_iou": 0.8046875, + "loss_num": 0.05712890625, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 44524384, + "step": 672 + }, + { + "epoch": 0.06299433706182431, + "grad_norm": 18.115175247192383, + "learning_rate": 5e-05, + "loss": 1.5983, + "num_input_tokens_seen": 44590172, + "step": 673 + }, + { + "epoch": 0.06299433706182431, + "loss": 1.6607317924499512, + "loss_ce": 0.0035052087623625994, + "loss_iou": 0.74609375, + "loss_num": 0.03271484375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 44590172, + "step": 673 + }, + { + "epoch": 0.06308793934572003, + "grad_norm": 21.669931411743164, + "learning_rate": 5e-05, + "loss": 2.185, + "num_input_tokens_seen": 44655864, + "step": 674 + }, + { + "epoch": 0.06308793934572003, + "loss": 2.141312599182129, + "loss_ce": 0.0026406331453472376, + "loss_iou": 0.8671875, + "loss_num": 0.07958984375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 44655864, + "step": 674 + }, + { + "epoch": 0.06318154162961576, + "grad_norm": 10.55396842956543, + "learning_rate": 5e-05, + "loss": 1.7654, + "num_input_tokens_seen": 44722456, + "step": 675 + }, + { + "epoch": 0.06318154162961576, + "loss": 1.8805890083312988, + "loss_ce": 0.00461235037073493, + "loss_iou": 0.84375, + "loss_num": 0.037353515625, + "loss_xval": 1.875, + "num_input_tokens_seen": 44722456, + "step": 675 + }, + { + "epoch": 0.0632751439135115, + "grad_norm": 13.018916130065918, + "learning_rate": 5e-05, + "loss": 1.6273, + "num_input_tokens_seen": 44789424, + "step": 676 + }, + { + "epoch": 0.0632751439135115, + "loss": 1.4644622802734375, + "loss_ce": 0.004989564418792725, + "loss_iou": 0.62890625, + "loss_num": 0.039794921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 44789424, + "step": 676 + }, + { + "epoch": 0.06336874619740722, + "grad_norm": 24.016740798950195, + "learning_rate": 5e-05, + "loss": 1.7852, + "num_input_tokens_seen": 44855256, + "step": 677 + }, + { + "epoch": 0.06336874619740722, + "loss": 1.588794469833374, + "loss_ce": 0.007739695720374584, + "loss_iou": 0.61328125, + "loss_num": 0.0703125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 44855256, + "step": 677 + }, + { + "epoch": 0.06346234848130294, + "grad_norm": 27.151763916015625, + "learning_rate": 5e-05, + "loss": 1.7278, + "num_input_tokens_seen": 44921840, + "step": 678 + }, + { + "epoch": 0.06346234848130294, + "loss": 1.693800687789917, + "loss_ce": 0.006300638429820538, + "loss_iou": 0.74609375, + "loss_num": 0.038818359375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 44921840, + "step": 678 + }, + { + "epoch": 0.06355595076519867, + "grad_norm": 10.849607467651367, + "learning_rate": 5e-05, + "loss": 2.1003, + "num_input_tokens_seen": 44987480, + "step": 679 + }, + { + "epoch": 0.06355595076519867, + "loss": 2.2084901332855225, + "loss_ce": 0.0024354765191674232, + "loss_iou": 0.97265625, + "loss_num": 0.052490234375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 44987480, + "step": 679 + }, + { + "epoch": 0.06364955304909439, + "grad_norm": 13.273422241210938, + "learning_rate": 5e-05, + "loss": 1.7229, + "num_input_tokens_seen": 45053144, + "step": 680 + }, + { + "epoch": 0.06364955304909439, + "loss": 1.7986541986465454, + "loss_ce": 0.0056854551658034325, + "loss_iou": 0.76171875, + "loss_num": 0.053466796875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 45053144, + "step": 680 + }, + { + "epoch": 0.06374315533299013, + "grad_norm": 20.408994674682617, + "learning_rate": 5e-05, + "loss": 1.6493, + "num_input_tokens_seen": 45119624, + "step": 681 + }, + { + "epoch": 0.06374315533299013, + "loss": 1.6919260025024414, + "loss_ce": 0.007355560548603535, + "loss_iou": 0.72265625, + "loss_num": 0.047607421875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 45119624, + "step": 681 + }, + { + "epoch": 0.06383675761688586, + "grad_norm": 13.204854011535645, + "learning_rate": 5e-05, + "loss": 1.5412, + "num_input_tokens_seen": 45186636, + "step": 682 + }, + { + "epoch": 0.06383675761688586, + "loss": 1.6893914937973022, + "loss_ce": 0.005797786638140678, + "loss_iou": 0.72265625, + "loss_num": 0.04736328125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 45186636, + "step": 682 + }, + { + "epoch": 0.06393035990078158, + "grad_norm": 20.79660415649414, + "learning_rate": 5e-05, + "loss": 2.1258, + "num_input_tokens_seen": 45251808, + "step": 683 + }, + { + "epoch": 0.06393035990078158, + "loss": 2.0212926864624023, + "loss_ce": 0.005667629651725292, + "loss_iou": 0.83984375, + "loss_num": 0.06689453125, + "loss_xval": 2.015625, + "num_input_tokens_seen": 45251808, + "step": 683 + }, + { + "epoch": 0.0640239621846773, + "grad_norm": 16.694738388061523, + "learning_rate": 5e-05, + "loss": 1.8483, + "num_input_tokens_seen": 45318916, + "step": 684 + }, + { + "epoch": 0.0640239621846773, + "loss": 1.895155429840088, + "loss_ce": 0.0055069150403141975, + "loss_iou": 0.83984375, + "loss_num": 0.042236328125, + "loss_xval": 1.890625, + "num_input_tokens_seen": 45318916, + "step": 684 + }, + { + "epoch": 0.06411756446857303, + "grad_norm": 14.720990180969238, + "learning_rate": 5e-05, + "loss": 1.6624, + "num_input_tokens_seen": 45385816, + "step": 685 + }, + { + "epoch": 0.06411756446857303, + "loss": 1.6102463006973267, + "loss_ce": 0.004777542781084776, + "loss_iou": 0.71875, + "loss_num": 0.034423828125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 45385816, + "step": 685 + }, + { + "epoch": 0.06421116675246875, + "grad_norm": 22.114540100097656, + "learning_rate": 5e-05, + "loss": 1.9662, + "num_input_tokens_seen": 45452036, + "step": 686 + }, + { + "epoch": 0.06421116675246875, + "loss": 1.892499566078186, + "loss_ce": 0.007733910344541073, + "loss_iou": 0.8359375, + "loss_num": 0.0419921875, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 45452036, + "step": 686 + }, + { + "epoch": 0.06430476903636449, + "grad_norm": 22.834501266479492, + "learning_rate": 5e-05, + "loss": 1.6495, + "num_input_tokens_seen": 45517740, + "step": 687 + }, + { + "epoch": 0.06430476903636449, + "loss": 1.5179729461669922, + "loss_ce": 0.002347860485315323, + "loss_iou": 0.65234375, + "loss_num": 0.04248046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 45517740, + "step": 687 + }, + { + "epoch": 0.06439837132026022, + "grad_norm": 14.208182334899902, + "learning_rate": 5e-05, + "loss": 2.1564, + "num_input_tokens_seen": 45584060, + "step": 688 + }, + { + "epoch": 0.06439837132026022, + "loss": 2.085690975189209, + "loss_ce": 0.007565838750451803, + "loss_iou": 0.8828125, + "loss_num": 0.06298828125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 45584060, + "step": 688 + }, + { + "epoch": 0.06449197360415594, + "grad_norm": 16.036930084228516, + "learning_rate": 5e-05, + "loss": 1.8611, + "num_input_tokens_seen": 45649572, + "step": 689 + }, + { + "epoch": 0.06449197360415594, + "loss": 2.0573134422302246, + "loss_ce": 0.004579117055982351, + "loss_iou": 0.859375, + "loss_num": 0.06689453125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 45649572, + "step": 689 + }, + { + "epoch": 0.06458557588805167, + "grad_norm": 31.936649322509766, + "learning_rate": 5e-05, + "loss": 1.8204, + "num_input_tokens_seen": 45713996, + "step": 690 + }, + { + "epoch": 0.06458557588805167, + "loss": 1.8594334125518799, + "loss_ce": 0.0029881575610488653, + "loss_iou": 0.78125, + "loss_num": 0.057861328125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 45713996, + "step": 690 + }, + { + "epoch": 0.06467917817194739, + "grad_norm": 12.208847045898438, + "learning_rate": 5e-05, + "loss": 2.1596, + "num_input_tokens_seen": 45780704, + "step": 691 + }, + { + "epoch": 0.06467917817194739, + "loss": 2.1293883323669434, + "loss_ce": 0.008294601924717426, + "loss_iou": 0.953125, + "loss_num": 0.042236328125, + "loss_xval": 2.125, + "num_input_tokens_seen": 45780704, + "step": 691 + }, + { + "epoch": 0.06477278045584313, + "grad_norm": 13.552273750305176, + "learning_rate": 5e-05, + "loss": 1.9639, + "num_input_tokens_seen": 45846124, + "step": 692 + }, + { + "epoch": 0.06477278045584313, + "loss": 1.8553943634033203, + "loss_ce": 0.007738055661320686, + "loss_iou": 0.78125, + "loss_num": 0.057861328125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 45846124, + "step": 692 + }, + { + "epoch": 0.06486638273973885, + "grad_norm": 10.535679817199707, + "learning_rate": 5e-05, + "loss": 1.9231, + "num_input_tokens_seen": 45912244, + "step": 693 + }, + { + "epoch": 0.06486638273973885, + "loss": 1.6916513442993164, + "loss_ce": 0.002198208589106798, + "loss_iou": 0.73828125, + "loss_num": 0.041748046875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 45912244, + "step": 693 + }, + { + "epoch": 0.06495998502363458, + "grad_norm": 13.686797142028809, + "learning_rate": 5e-05, + "loss": 1.84, + "num_input_tokens_seen": 45978900, + "step": 694 + }, + { + "epoch": 0.06495998502363458, + "loss": 1.8427501916885376, + "loss_ce": 0.003883079392835498, + "loss_iou": 0.79296875, + "loss_num": 0.050537109375, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 45978900, + "step": 694 + }, + { + "epoch": 0.0650535873075303, + "grad_norm": 24.21961784362793, + "learning_rate": 5e-05, + "loss": 1.6559, + "num_input_tokens_seen": 46043936, + "step": 695 + }, + { + "epoch": 0.0650535873075303, + "loss": 1.4779677391052246, + "loss_ce": 0.0028701634146273136, + "loss_iou": 0.6171875, + "loss_num": 0.0478515625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 46043936, + "step": 695 + }, + { + "epoch": 0.06514718959142603, + "grad_norm": 18.41437339782715, + "learning_rate": 5e-05, + "loss": 1.8008, + "num_input_tokens_seen": 46110496, + "step": 696 + }, + { + "epoch": 0.06514718959142603, + "loss": 1.6277146339416504, + "loss_ce": 0.0012497918214648962, + "loss_iou": 0.73046875, + "loss_num": 0.032470703125, + "loss_xval": 1.625, + "num_input_tokens_seen": 46110496, + "step": 696 + }, + { + "epoch": 0.06524079187532175, + "grad_norm": 16.649349212646484, + "learning_rate": 5e-05, + "loss": 1.7843, + "num_input_tokens_seen": 46176756, + "step": 697 + }, + { + "epoch": 0.06524079187532175, + "loss": 1.8919517993927002, + "loss_ce": 0.0032798913307487965, + "loss_iou": 0.81640625, + "loss_num": 0.05126953125, + "loss_xval": 1.890625, + "num_input_tokens_seen": 46176756, + "step": 697 + }, + { + "epoch": 0.06533439415921749, + "grad_norm": 38.61091613769531, + "learning_rate": 5e-05, + "loss": 1.7536, + "num_input_tokens_seen": 46242792, + "step": 698 + }, + { + "epoch": 0.06533439415921749, + "loss": 1.642912745475769, + "loss_ce": 0.004729137755930424, + "loss_iou": 0.73046875, + "loss_num": 0.034912109375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 46242792, + "step": 698 + }, + { + "epoch": 0.06542799644311321, + "grad_norm": 58.10789489746094, + "learning_rate": 5e-05, + "loss": 1.853, + "num_input_tokens_seen": 46307004, + "step": 699 + }, + { + "epoch": 0.06542799644311321, + "loss": 2.120300531387329, + "loss_ce": 0.006042658817023039, + "loss_iou": 0.86328125, + "loss_num": 0.0771484375, + "loss_xval": 2.109375, + "num_input_tokens_seen": 46307004, + "step": 699 + }, + { + "epoch": 0.06552159872700894, + "grad_norm": 20.29197120666504, + "learning_rate": 5e-05, + "loss": 2.0745, + "num_input_tokens_seen": 46373032, + "step": 700 + }, + { + "epoch": 0.06552159872700894, + "loss": 2.0715878009796143, + "loss_ce": 0.004205000586807728, + "loss_iou": 0.8984375, + "loss_num": 0.05419921875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 46373032, + "step": 700 + }, + { + "epoch": 0.06561520101090466, + "grad_norm": 12.449002265930176, + "learning_rate": 5e-05, + "loss": 1.587, + "num_input_tokens_seen": 46438588, + "step": 701 + }, + { + "epoch": 0.06561520101090466, + "loss": 1.4610304832458496, + "loss_ce": 0.003511058399453759, + "loss_iou": 0.6484375, + "loss_num": 0.031982421875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 46438588, + "step": 701 + }, + { + "epoch": 0.06570880329480039, + "grad_norm": 10.84778881072998, + "learning_rate": 5e-05, + "loss": 1.6071, + "num_input_tokens_seen": 46505752, + "step": 702 + }, + { + "epoch": 0.06570880329480039, + "loss": 1.400804877281189, + "loss_ce": 0.004320591688156128, + "loss_iou": 0.625, + "loss_num": 0.030029296875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 46505752, + "step": 702 + }, + { + "epoch": 0.06580240557869613, + "grad_norm": 24.643667221069336, + "learning_rate": 5e-05, + "loss": 1.6514, + "num_input_tokens_seen": 46571916, + "step": 703 + }, + { + "epoch": 0.06580240557869613, + "loss": 1.572337031364441, + "loss_ce": 0.01179010234773159, + "loss_iou": 0.68359375, + "loss_num": 0.03857421875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 46571916, + "step": 703 + }, + { + "epoch": 0.06589600786259185, + "grad_norm": 15.583794593811035, + "learning_rate": 5e-05, + "loss": 2.0572, + "num_input_tokens_seen": 46638960, + "step": 704 + }, + { + "epoch": 0.06589600786259185, + "loss": 1.9588515758514404, + "loss_ce": 0.012562518008053303, + "loss_iou": 0.8671875, + "loss_num": 0.0419921875, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 46638960, + "step": 704 + }, + { + "epoch": 0.06598961014648758, + "grad_norm": 10.499127388000488, + "learning_rate": 5e-05, + "loss": 1.8242, + "num_input_tokens_seen": 46704448, + "step": 705 + }, + { + "epoch": 0.06598961014648758, + "loss": 1.8935816287994385, + "loss_ce": 0.003933214582502842, + "loss_iou": 0.7734375, + "loss_num": 0.068359375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 46704448, + "step": 705 + }, + { + "epoch": 0.0660832124303833, + "grad_norm": 24.402984619140625, + "learning_rate": 5e-05, + "loss": 1.6786, + "num_input_tokens_seen": 46770648, + "step": 706 + }, + { + "epoch": 0.0660832124303833, + "loss": 1.7661579847335815, + "loss_ce": 0.009383074007928371, + "loss_iou": 0.7265625, + "loss_num": 0.0615234375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 46770648, + "step": 706 + }, + { + "epoch": 0.06617681471427903, + "grad_norm": 16.042818069458008, + "learning_rate": 5e-05, + "loss": 1.7377, + "num_input_tokens_seen": 46836960, + "step": 707 + }, + { + "epoch": 0.06617681471427903, + "loss": 1.7936598062515259, + "loss_ce": 0.0055738468654453754, + "loss_iou": 0.78125, + "loss_num": 0.044921875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 46836960, + "step": 707 + }, + { + "epoch": 0.06627041699817475, + "grad_norm": 35.5173454284668, + "learning_rate": 5e-05, + "loss": 1.8091, + "num_input_tokens_seen": 46903948, + "step": 708 + }, + { + "epoch": 0.06627041699817475, + "loss": 2.017773151397705, + "loss_ce": 0.004101386293768883, + "loss_iou": 0.83984375, + "loss_num": 0.06640625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 46903948, + "step": 708 + }, + { + "epoch": 0.06636401928207049, + "grad_norm": 18.368568420410156, + "learning_rate": 5e-05, + "loss": 1.7415, + "num_input_tokens_seen": 46969168, + "step": 709 + }, + { + "epoch": 0.06636401928207049, + "loss": 1.5899379253387451, + "loss_ce": 0.0038172572385519743, + "loss_iou": 0.68359375, + "loss_num": 0.043212890625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 46969168, + "step": 709 + }, + { + "epoch": 0.06645762156596621, + "grad_norm": 34.08951950073242, + "learning_rate": 5e-05, + "loss": 1.7784, + "num_input_tokens_seen": 47035368, + "step": 710 + }, + { + "epoch": 0.06645762156596621, + "loss": 1.9146358966827393, + "loss_ce": 0.0035031517036259174, + "loss_iou": 0.84765625, + "loss_num": 0.04345703125, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 47035368, + "step": 710 + }, + { + "epoch": 0.06655122384986194, + "grad_norm": 12.061261177062988, + "learning_rate": 5e-05, + "loss": 2.0622, + "num_input_tokens_seen": 47102132, + "step": 711 + }, + { + "epoch": 0.06655122384986194, + "loss": 1.9517844915390015, + "loss_ce": 0.0040306393057107925, + "loss_iou": 0.8203125, + "loss_num": 0.060791015625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 47102132, + "step": 711 + }, + { + "epoch": 0.06664482613375766, + "grad_norm": 25.1570987701416, + "learning_rate": 5e-05, + "loss": 1.7394, + "num_input_tokens_seen": 47167796, + "step": 712 + }, + { + "epoch": 0.06664482613375766, + "loss": 1.977304458618164, + "loss_ce": 0.00464824540540576, + "loss_iou": 0.82421875, + "loss_num": 0.06591796875, + "loss_xval": 1.96875, + "num_input_tokens_seen": 47167796, + "step": 712 + }, + { + "epoch": 0.06673842841765339, + "grad_norm": 25.616376876831055, + "learning_rate": 5e-05, + "loss": 1.3528, + "num_input_tokens_seen": 47233524, + "step": 713 + }, + { + "epoch": 0.06673842841765339, + "loss": 1.328101634979248, + "loss_ce": 0.010718777775764465, + "loss_iou": 0.51171875, + "loss_num": 0.058349609375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 47233524, + "step": 713 + }, + { + "epoch": 0.06683203070154911, + "grad_norm": 14.906170845031738, + "learning_rate": 5e-05, + "loss": 1.6391, + "num_input_tokens_seen": 47300436, + "step": 714 + }, + { + "epoch": 0.06683203070154911, + "loss": 1.5979814529418945, + "loss_ce": 0.004231479484587908, + "loss_iou": 0.7109375, + "loss_num": 0.034912109375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 47300436, + "step": 714 + }, + { + "epoch": 0.06692563298544485, + "grad_norm": 30.154172897338867, + "learning_rate": 5e-05, + "loss": 1.8866, + "num_input_tokens_seen": 47366896, + "step": 715 + }, + { + "epoch": 0.06692563298544485, + "loss": 2.0287551879882812, + "loss_ce": 0.003364487085491419, + "loss_iou": 0.875, + "loss_num": 0.05419921875, + "loss_xval": 2.03125, + "num_input_tokens_seen": 47366896, + "step": 715 + }, + { + "epoch": 0.06701923526934057, + "grad_norm": 21.635082244873047, + "learning_rate": 5e-05, + "loss": 1.8791, + "num_input_tokens_seen": 47433372, + "step": 716 + }, + { + "epoch": 0.06701923526934057, + "loss": 1.9303206205368042, + "loss_ce": 0.004539405927062035, + "loss_iou": 0.8046875, + "loss_num": 0.06298828125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 47433372, + "step": 716 + }, + { + "epoch": 0.0671128375532363, + "grad_norm": 26.324674606323242, + "learning_rate": 5e-05, + "loss": 1.6057, + "num_input_tokens_seen": 47499168, + "step": 717 + }, + { + "epoch": 0.0671128375532363, + "loss": 1.7501418590545654, + "loss_ce": 0.005024688318371773, + "loss_iou": 0.76953125, + "loss_num": 0.04150390625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 47499168, + "step": 717 + }, + { + "epoch": 0.06720643983713202, + "grad_norm": 10.50040054321289, + "learning_rate": 5e-05, + "loss": 2.0523, + "num_input_tokens_seen": 47564628, + "step": 718 + }, + { + "epoch": 0.06720643983713202, + "loss": 2.2206830978393555, + "loss_ce": 0.00876903161406517, + "loss_iou": 0.9765625, + "loss_num": 0.05126953125, + "loss_xval": 2.21875, + "num_input_tokens_seen": 47564628, + "step": 718 + }, + { + "epoch": 0.06730004212102775, + "grad_norm": 31.882604598999023, + "learning_rate": 5e-05, + "loss": 1.8316, + "num_input_tokens_seen": 47631796, + "step": 719 + }, + { + "epoch": 0.06730004212102775, + "loss": 1.78173828125, + "loss_ce": 0.006347662769258022, + "loss_iou": 0.74609375, + "loss_num": 0.056640625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 47631796, + "step": 719 + }, + { + "epoch": 0.06739364440492349, + "grad_norm": 15.604974746704102, + "learning_rate": 5e-05, + "loss": 1.6485, + "num_input_tokens_seen": 47697144, + "step": 720 + }, + { + "epoch": 0.06739364440492349, + "loss": 1.6074495315551758, + "loss_ce": 0.008633635938167572, + "loss_iou": 0.6640625, + "loss_num": 0.053955078125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 47697144, + "step": 720 + }, + { + "epoch": 0.06748724668881921, + "grad_norm": 12.568963050842285, + "learning_rate": 5e-05, + "loss": 1.5024, + "num_input_tokens_seen": 47763272, + "step": 721 + }, + { + "epoch": 0.06748724668881921, + "loss": 1.5843994617462158, + "loss_ce": 0.002368141431361437, + "loss_iou": 0.71875, + "loss_num": 0.0289306640625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 47763272, + "step": 721 + }, + { + "epoch": 0.06758084897271494, + "grad_norm": 27.118867874145508, + "learning_rate": 5e-05, + "loss": 1.7845, + "num_input_tokens_seen": 47828356, + "step": 722 + }, + { + "epoch": 0.06758084897271494, + "loss": 1.8423880338668823, + "loss_ce": 0.0059622423723340034, + "loss_iou": 0.78515625, + "loss_num": 0.05322265625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 47828356, + "step": 722 + }, + { + "epoch": 0.06767445125661066, + "grad_norm": 17.231006622314453, + "learning_rate": 5e-05, + "loss": 2.0219, + "num_input_tokens_seen": 47895416, + "step": 723 + }, + { + "epoch": 0.06767445125661066, + "loss": 2.1317527294158936, + "loss_ce": 0.001869881758466363, + "loss_iou": 0.921875, + "loss_num": 0.05712890625, + "loss_xval": 2.125, + "num_input_tokens_seen": 47895416, + "step": 723 + }, + { + "epoch": 0.06776805354050638, + "grad_norm": 15.999128341674805, + "learning_rate": 5e-05, + "loss": 1.883, + "num_input_tokens_seen": 47961972, + "step": 724 + }, + { + "epoch": 0.06776805354050638, + "loss": 1.8662493228912354, + "loss_ce": 0.006874297745525837, + "loss_iou": 0.78125, + "loss_num": 0.05908203125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 47961972, + "step": 724 + }, + { + "epoch": 0.06786165582440211, + "grad_norm": 15.403785705566406, + "learning_rate": 5e-05, + "loss": 1.5873, + "num_input_tokens_seen": 48028404, + "step": 725 + }, + { + "epoch": 0.06786165582440211, + "loss": 1.6128623485565186, + "loss_ce": 0.0064169904217123985, + "loss_iou": 0.65625, + "loss_num": 0.058837890625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 48028404, + "step": 725 + }, + { + "epoch": 0.06795525810829785, + "grad_norm": 25.324594497680664, + "learning_rate": 5e-05, + "loss": 1.5274, + "num_input_tokens_seen": 48093988, + "step": 726 + }, + { + "epoch": 0.06795525810829785, + "loss": 1.6561145782470703, + "loss_ce": 0.006700488738715649, + "loss_iou": 0.71484375, + "loss_num": 0.044189453125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 48093988, + "step": 726 + }, + { + "epoch": 0.06804886039219357, + "grad_norm": 32.763973236083984, + "learning_rate": 5e-05, + "loss": 1.8417, + "num_input_tokens_seen": 48159828, + "step": 727 + }, + { + "epoch": 0.06804886039219357, + "loss": 1.7738572359085083, + "loss_ce": 0.005302562844008207, + "loss_iou": 0.7265625, + "loss_num": 0.0625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 48159828, + "step": 727 + }, + { + "epoch": 0.0681424626760893, + "grad_norm": 21.48606300354004, + "learning_rate": 5e-05, + "loss": 1.8958, + "num_input_tokens_seen": 48226332, + "step": 728 + }, + { + "epoch": 0.0681424626760893, + "loss": 1.9721516370773315, + "loss_ce": 0.0024250440765172243, + "loss_iou": 0.828125, + "loss_num": 0.0625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 48226332, + "step": 728 + }, + { + "epoch": 0.06823606495998502, + "grad_norm": 18.606369018554688, + "learning_rate": 5e-05, + "loss": 1.5867, + "num_input_tokens_seen": 48290772, + "step": 729 + }, + { + "epoch": 0.06823606495998502, + "loss": 1.4791977405548096, + "loss_ce": 0.006541408132761717, + "loss_iou": 0.65625, + "loss_num": 0.031494140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 48290772, + "step": 729 + }, + { + "epoch": 0.06832966724388075, + "grad_norm": 33.228878021240234, + "learning_rate": 5e-05, + "loss": 1.7367, + "num_input_tokens_seen": 48356540, + "step": 730 + }, + { + "epoch": 0.06832966724388075, + "loss": 1.8499488830566406, + "loss_ce": 0.0032692099921405315, + "loss_iou": 0.7734375, + "loss_num": 0.0595703125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 48356540, + "step": 730 + }, + { + "epoch": 0.06842326952777648, + "grad_norm": 11.724018096923828, + "learning_rate": 5e-05, + "loss": 1.6791, + "num_input_tokens_seen": 48423580, + "step": 731 + }, + { + "epoch": 0.06842326952777648, + "loss": 1.742972731590271, + "loss_ce": 0.0046915700659155846, + "loss_iou": 0.75, + "loss_num": 0.0478515625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 48423580, + "step": 731 + }, + { + "epoch": 0.06851687181167221, + "grad_norm": 16.34622573852539, + "learning_rate": 5e-05, + "loss": 1.734, + "num_input_tokens_seen": 48489440, + "step": 732 + }, + { + "epoch": 0.06851687181167221, + "loss": 1.684220790863037, + "loss_ce": 0.004533355124294758, + "loss_iou": 0.69921875, + "loss_num": 0.055419921875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 48489440, + "step": 732 + }, + { + "epoch": 0.06861047409556793, + "grad_norm": 16.87637710571289, + "learning_rate": 5e-05, + "loss": 1.8085, + "num_input_tokens_seen": 48555192, + "step": 733 + }, + { + "epoch": 0.06861047409556793, + "loss": 1.616333246231079, + "loss_ce": 0.005005074664950371, + "loss_iou": 0.6796875, + "loss_num": 0.05078125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 48555192, + "step": 733 + }, + { + "epoch": 0.06870407637946366, + "grad_norm": 28.047243118286133, + "learning_rate": 5e-05, + "loss": 1.6381, + "num_input_tokens_seen": 48621144, + "step": 734 + }, + { + "epoch": 0.06870407637946366, + "loss": 1.5632829666137695, + "loss_ce": 0.007618976291269064, + "loss_iou": 0.65234375, + "loss_num": 0.050537109375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 48621144, + "step": 734 + }, + { + "epoch": 0.06879767866335938, + "grad_norm": 11.642561912536621, + "learning_rate": 5e-05, + "loss": 2.2517, + "num_input_tokens_seen": 48687896, + "step": 735 + }, + { + "epoch": 0.06879767866335938, + "loss": 2.342609167098999, + "loss_ce": 0.001789049245417118, + "loss_iou": 0.984375, + "loss_num": 0.0751953125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 48687896, + "step": 735 + }, + { + "epoch": 0.0688912809472551, + "grad_norm": 16.74951171875, + "learning_rate": 5e-05, + "loss": 1.7637, + "num_input_tokens_seen": 48754184, + "step": 736 + }, + { + "epoch": 0.0688912809472551, + "loss": 1.548715353012085, + "loss_ce": 0.004159669391810894, + "loss_iou": 0.62890625, + "loss_num": 0.0576171875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 48754184, + "step": 736 + }, + { + "epoch": 0.06898488323115085, + "grad_norm": 18.810588836669922, + "learning_rate": 5e-05, + "loss": 1.7837, + "num_input_tokens_seen": 48819948, + "step": 737 + }, + { + "epoch": 0.06898488323115085, + "loss": 1.82712984085083, + "loss_ce": 0.004864188842475414, + "loss_iou": 0.7578125, + "loss_num": 0.060546875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 48819948, + "step": 737 + }, + { + "epoch": 0.06907848551504657, + "grad_norm": 16.910188674926758, + "learning_rate": 5e-05, + "loss": 1.7753, + "num_input_tokens_seen": 48885528, + "step": 738 + }, + { + "epoch": 0.06907848551504657, + "loss": 1.668896198272705, + "loss_ce": 0.0022703027352690697, + "loss_iou": 0.75, + "loss_num": 0.0341796875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 48885528, + "step": 738 + }, + { + "epoch": 0.0691720877989423, + "grad_norm": 14.250575065612793, + "learning_rate": 5e-05, + "loss": 1.9187, + "num_input_tokens_seen": 48952572, + "step": 739 + }, + { + "epoch": 0.0691720877989423, + "loss": 1.8930304050445557, + "loss_ce": 0.005823412910103798, + "loss_iou": 0.79296875, + "loss_num": 0.0595703125, + "loss_xval": 1.890625, + "num_input_tokens_seen": 48952572, + "step": 739 + }, + { + "epoch": 0.06926569008283802, + "grad_norm": 35.549198150634766, + "learning_rate": 5e-05, + "loss": 1.7614, + "num_input_tokens_seen": 49019840, + "step": 740 + }, + { + "epoch": 0.06926569008283802, + "loss": 1.556145191192627, + "loss_ce": 0.005241925362497568, + "loss_iou": 0.6875, + "loss_num": 0.03466796875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 49019840, + "step": 740 + }, + { + "epoch": 0.06935929236673374, + "grad_norm": 14.610987663269043, + "learning_rate": 5e-05, + "loss": 2.45, + "num_input_tokens_seen": 49085132, + "step": 741 + }, + { + "epoch": 0.06935929236673374, + "loss": 2.3336081504821777, + "loss_ce": 0.003530060639604926, + "loss_iou": 0.9609375, + "loss_num": 0.08251953125, + "loss_xval": 2.328125, + "num_input_tokens_seen": 49085132, + "step": 741 + }, + { + "epoch": 0.06945289465062948, + "grad_norm": 11.338419914245605, + "learning_rate": 5e-05, + "loss": 1.7574, + "num_input_tokens_seen": 49150472, + "step": 742 + }, + { + "epoch": 0.06945289465062948, + "loss": 2.0119495391845703, + "loss_ce": 0.005113525316119194, + "loss_iou": 0.88671875, + "loss_num": 0.047119140625, + "loss_xval": 2.0, + "num_input_tokens_seen": 49150472, + "step": 742 + }, + { + "epoch": 0.0695464969345252, + "grad_norm": 16.81510353088379, + "learning_rate": 5e-05, + "loss": 1.7434, + "num_input_tokens_seen": 49217468, + "step": 743 + }, + { + "epoch": 0.0695464969345252, + "loss": 1.855400562286377, + "loss_ce": 0.005791161209344864, + "loss_iou": 0.84375, + "loss_num": 0.032470703125, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 49217468, + "step": 743 + }, + { + "epoch": 0.06964009921842093, + "grad_norm": 19.771026611328125, + "learning_rate": 5e-05, + "loss": 1.3439, + "num_input_tokens_seen": 49283552, + "step": 744 + }, + { + "epoch": 0.06964009921842093, + "loss": 1.2848083972930908, + "loss_ce": 0.007952988147735596, + "loss_iou": 0.5625, + "loss_num": 0.03076171875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 49283552, + "step": 744 + }, + { + "epoch": 0.06973370150231666, + "grad_norm": 12.469643592834473, + "learning_rate": 5e-05, + "loss": 1.8382, + "num_input_tokens_seen": 49350048, + "step": 745 + }, + { + "epoch": 0.06973370150231666, + "loss": 2.0062437057495117, + "loss_ce": 0.003313942113891244, + "loss_iou": 0.89453125, + "loss_num": 0.04296875, + "loss_xval": 2.0, + "num_input_tokens_seen": 49350048, + "step": 745 + }, + { + "epoch": 0.06982730378621238, + "grad_norm": 17.502429962158203, + "learning_rate": 5e-05, + "loss": 1.7413, + "num_input_tokens_seen": 49416564, + "step": 746 + }, + { + "epoch": 0.06982730378621238, + "loss": 1.8351719379425049, + "loss_ce": 0.009976711124181747, + "loss_iou": 0.7578125, + "loss_num": 0.061279296875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 49416564, + "step": 746 + }, + { + "epoch": 0.0699209060701081, + "grad_norm": 27.377117156982422, + "learning_rate": 5e-05, + "loss": 1.6046, + "num_input_tokens_seen": 49482492, + "step": 747 + }, + { + "epoch": 0.0699209060701081, + "loss": 1.574857234954834, + "loss_ce": 0.007596466690301895, + "loss_iou": 0.66015625, + "loss_num": 0.048583984375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 49482492, + "step": 747 + }, + { + "epoch": 0.07001450835400384, + "grad_norm": 15.532224655151367, + "learning_rate": 5e-05, + "loss": 1.882, + "num_input_tokens_seen": 49549728, + "step": 748 + }, + { + "epoch": 0.07001450835400384, + "loss": 1.9582514762878418, + "loss_ce": 0.01000935211777687, + "loss_iou": 0.875, + "loss_num": 0.03955078125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 49549728, + "step": 748 + }, + { + "epoch": 0.07010811063789957, + "grad_norm": 12.926255226135254, + "learning_rate": 5e-05, + "loss": 1.7915, + "num_input_tokens_seen": 49615236, + "step": 749 + }, + { + "epoch": 0.07010811063789957, + "loss": 1.8438667058944702, + "loss_ce": 0.009394025430083275, + "loss_iou": 0.7578125, + "loss_num": 0.0634765625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 49615236, + "step": 749 + }, + { + "epoch": 0.07020171292179529, + "grad_norm": 23.295557022094727, + "learning_rate": 5e-05, + "loss": 1.5518, + "num_input_tokens_seen": 49680536, + "step": 750 + }, + { + "epoch": 0.07020171292179529, + "eval_seeclick_CIoU": 0.0821551838889718, + "eval_seeclick_GIoU": 0.08433011546730995, + "eval_seeclick_IoU": 0.1993928849697113, + "eval_seeclick_MAE_all": 0.15256305783987045, + "eval_seeclick_MAE_h": 0.07523206993937492, + "eval_seeclick_MAE_w": 0.11818498000502586, + "eval_seeclick_MAE_x_boxes": 0.21669039130210876, + "eval_seeclick_MAE_y_boxes": 0.13354936987161636, + "eval_seeclick_NUM_probability": 0.997737854719162, + "eval_seeclick_inside_bbox": 0.2958333343267441, + "eval_seeclick_loss": 2.6831507682800293, + "eval_seeclick_loss_ce": 0.013645281083881855, + "eval_seeclick_loss_iou": 0.971923828125, + "eval_seeclick_loss_num": 0.1595611572265625, + "eval_seeclick_loss_xval": 2.7431640625, + "eval_seeclick_runtime": 64.8097, + "eval_seeclick_samples_per_second": 0.725, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 49680536, + "step": 750 + }, + { + "epoch": 0.07020171292179529, + "eval_icons_CIoU": -0.12176968157291412, + "eval_icons_GIoU": -0.05442662164568901, + "eval_icons_IoU": 0.038698263466358185, + "eval_icons_MAE_all": 0.16640260815620422, + "eval_icons_MAE_h": 0.13604643195867538, + "eval_icons_MAE_w": 0.22236677259206772, + "eval_icons_MAE_x_boxes": 0.07750638574361801, + "eval_icons_MAE_y_boxes": 0.09784968942403793, + "eval_icons_NUM_probability": 0.9974802136421204, + "eval_icons_inside_bbox": 0.04340277798473835, + "eval_icons_loss": 2.9876580238342285, + "eval_icons_loss_ce": 0.0013713959779124707, + "eval_icons_loss_iou": 1.064453125, + "eval_icons_loss_num": 0.1716156005859375, + "eval_icons_loss_xval": 2.990234375, + "eval_icons_runtime": 73.0627, + "eval_icons_samples_per_second": 0.684, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 49680536, + "step": 750 + }, + { + "epoch": 0.07020171292179529, + "eval_screenspot_CIoU": -0.01705087659259637, + "eval_screenspot_GIoU": -0.02666320465505123, + "eval_screenspot_IoU": 0.15526391565799713, + "eval_screenspot_MAE_all": 0.14108752955993017, + "eval_screenspot_MAE_h": 0.07705104599396388, + "eval_screenspot_MAE_w": 0.15436455110708872, + "eval_screenspot_MAE_x_boxes": 0.2198748638232549, + "eval_screenspot_MAE_y_boxes": 0.08775023867686589, + "eval_screenspot_NUM_probability": 0.9970801870028178, + "eval_screenspot_inside_bbox": 0.34916667143503827, + "eval_screenspot_loss": 2.797074794769287, + "eval_screenspot_loss_ce": 0.01522288036843141, + "eval_screenspot_loss_iou": 1.0514322916666667, + "eval_screenspot_loss_num": 0.147674560546875, + "eval_screenspot_loss_xval": 2.8414713541666665, + "eval_screenspot_runtime": 118.8564, + "eval_screenspot_samples_per_second": 0.749, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 49680536, + "step": 750 + }, + { + "epoch": 0.07020171292179529, + "eval_compot_CIoU": -0.040124996565282345, + "eval_compot_GIoU": -0.005084183067083359, + "eval_compot_IoU": 0.1061786413192749, + "eval_compot_MAE_all": 0.11273051053285599, + "eval_compot_MAE_h": 0.0688395258039236, + "eval_compot_MAE_w": 0.14000537246465683, + "eval_compot_MAE_x_boxes": 0.08593297004699707, + "eval_compot_MAE_y_boxes": 0.09216885641217232, + "eval_compot_NUM_probability": 0.996105432510376, + "eval_compot_inside_bbox": 0.1579861119389534, + "eval_compot_loss": 2.632723331451416, + "eval_compot_loss_ce": 0.0066622712183743715, + "eval_compot_loss_iou": 1.03125, + "eval_compot_loss_num": 0.11548614501953125, + "eval_compot_loss_xval": 2.63916015625, + "eval_compot_runtime": 70.3618, + "eval_compot_samples_per_second": 0.711, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 49680536, + "step": 750 + }, + { + "epoch": 0.07020171292179529, + "eval_custom_ui_MAE_all": 0.17065107077360153, + "eval_custom_ui_MAE_x": 0.13065431267023087, + "eval_custom_ui_MAE_y": 0.2106478363275528, + "eval_custom_ui_NUM_probability": 0.9943723678588867, + "eval_custom_ui_loss": 0.8370248675346375, + "eval_custom_ui_loss_ce": 0.03082600887864828, + "eval_custom_ui_loss_num": 0.163543701171875, + "eval_custom_ui_loss_xval": 0.8179931640625, + "eval_custom_ui_runtime": 55.5237, + "eval_custom_ui_samples_per_second": 0.901, + "eval_custom_ui_steps_per_second": 0.036, + "num_input_tokens_seen": 49680536, + "step": 750 + }, + { + "epoch": 0.07020171292179529, + "loss": 0.8694114685058594, + "loss_ce": 0.03200910612940788, + "loss_iou": 0.0, + "loss_num": 0.1669921875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 49680536, + "step": 750 + }, + { + "epoch": 0.07029531520569102, + "grad_norm": 19.479524612426758, + "learning_rate": 5e-05, + "loss": 1.8365, + "num_input_tokens_seen": 49746644, + "step": 751 + }, + { + "epoch": 0.07029531520569102, + "loss": 1.8767595291137695, + "loss_ce": 0.002736043417826295, + "loss_iou": 0.828125, + "loss_num": 0.04345703125, + "loss_xval": 1.875, + "num_input_tokens_seen": 49746644, + "step": 751 + }, + { + "epoch": 0.07038891748958674, + "grad_norm": 14.468050956726074, + "learning_rate": 5e-05, + "loss": 1.5694, + "num_input_tokens_seen": 49814392, + "step": 752 + }, + { + "epoch": 0.07038891748958674, + "loss": 1.7327210903167725, + "loss_ce": 0.00322896521538496, + "loss_iou": 0.72265625, + "loss_num": 0.056884765625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 49814392, + "step": 752 + }, + { + "epoch": 0.07048251977348248, + "grad_norm": 26.36949920654297, + "learning_rate": 5e-05, + "loss": 1.8411, + "num_input_tokens_seen": 49880920, + "step": 753 + }, + { + "epoch": 0.07048251977348248, + "loss": 1.820433259010315, + "loss_ce": 0.005980121437460184, + "loss_iou": 0.8046875, + "loss_num": 0.041259765625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 49880920, + "step": 753 + }, + { + "epoch": 0.0705761220573782, + "grad_norm": 14.882464408874512, + "learning_rate": 5e-05, + "loss": 1.7455, + "num_input_tokens_seen": 49946800, + "step": 754 + }, + { + "epoch": 0.0705761220573782, + "loss": 1.517045259475708, + "loss_ce": 0.004350018221884966, + "loss_iou": 0.6328125, + "loss_num": 0.050048828125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 49946800, + "step": 754 + }, + { + "epoch": 0.07066972434127393, + "grad_norm": 26.518571853637695, + "learning_rate": 5e-05, + "loss": 1.7098, + "num_input_tokens_seen": 50013044, + "step": 755 + }, + { + "epoch": 0.07066972434127393, + "loss": 1.873121738433838, + "loss_ce": 0.008863825350999832, + "loss_iou": 0.76171875, + "loss_num": 0.06787109375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 50013044, + "step": 755 + }, + { + "epoch": 0.07076332662516965, + "grad_norm": 30.003734588623047, + "learning_rate": 5e-05, + "loss": 1.8209, + "num_input_tokens_seen": 50079532, + "step": 756 + }, + { + "epoch": 0.07076332662516965, + "loss": 1.817097544670105, + "loss_ce": 0.0036210219841450453, + "loss_iou": 0.796875, + "loss_num": 0.043701171875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 50079532, + "step": 756 + }, + { + "epoch": 0.07085692890906538, + "grad_norm": 20.43360137939453, + "learning_rate": 5e-05, + "loss": 1.6892, + "num_input_tokens_seen": 50145584, + "step": 757 + }, + { + "epoch": 0.07085692890906538, + "loss": 1.3948793411254883, + "loss_ce": 0.0019349617650732398, + "loss_iou": 0.5625, + "loss_num": 0.05419921875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 50145584, + "step": 757 + }, + { + "epoch": 0.0709505311929611, + "grad_norm": 15.142757415771484, + "learning_rate": 5e-05, + "loss": 1.8305, + "num_input_tokens_seen": 50212980, + "step": 758 + }, + { + "epoch": 0.0709505311929611, + "loss": 1.6338109970092773, + "loss_ce": 0.005148960277438164, + "loss_iou": 0.6875, + "loss_num": 0.05078125, + "loss_xval": 1.625, + "num_input_tokens_seen": 50212980, + "step": 758 + }, + { + "epoch": 0.07104413347685684, + "grad_norm": 18.16373062133789, + "learning_rate": 5e-05, + "loss": 1.4583, + "num_input_tokens_seen": 50279488, + "step": 759 + }, + { + "epoch": 0.07104413347685684, + "loss": 1.4060198068618774, + "loss_ce": 0.008558880537748337, + "loss_iou": 0.6015625, + "loss_num": 0.038330078125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 50279488, + "step": 759 + }, + { + "epoch": 0.07113773576075257, + "grad_norm": 15.06195068359375, + "learning_rate": 5e-05, + "loss": 1.8918, + "num_input_tokens_seen": 50344944, + "step": 760 + }, + { + "epoch": 0.07113773576075257, + "loss": 1.9246017932891846, + "loss_ce": 0.006632945500314236, + "loss_iou": 0.84765625, + "loss_num": 0.044677734375, + "loss_xval": 1.921875, + "num_input_tokens_seen": 50344944, + "step": 760 + }, + { + "epoch": 0.07123133804464829, + "grad_norm": 26.571983337402344, + "learning_rate": 5e-05, + "loss": 1.8377, + "num_input_tokens_seen": 50411756, + "step": 761 + }, + { + "epoch": 0.07123133804464829, + "loss": 1.8060612678527832, + "loss_ce": 0.010162830352783203, + "loss_iou": 0.765625, + "loss_num": 0.05322265625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 50411756, + "step": 761 + }, + { + "epoch": 0.07132494032854401, + "grad_norm": 16.121057510375977, + "learning_rate": 5e-05, + "loss": 1.6626, + "num_input_tokens_seen": 50478228, + "step": 762 + }, + { + "epoch": 0.07132494032854401, + "loss": 1.7193970680236816, + "loss_ce": 0.006506370846182108, + "loss_iou": 0.703125, + "loss_num": 0.061279296875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 50478228, + "step": 762 + }, + { + "epoch": 0.07141854261243974, + "grad_norm": 30.311779022216797, + "learning_rate": 5e-05, + "loss": 2.0037, + "num_input_tokens_seen": 50543592, + "step": 763 + }, + { + "epoch": 0.07141854261243974, + "loss": 1.9180309772491455, + "loss_ce": 0.00494496151804924, + "loss_iou": 0.8359375, + "loss_num": 0.0478515625, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 50543592, + "step": 763 + }, + { + "epoch": 0.07151214489633546, + "grad_norm": 33.463836669921875, + "learning_rate": 5e-05, + "loss": 2.0935, + "num_input_tokens_seen": 50610348, + "step": 764 + }, + { + "epoch": 0.07151214489633546, + "loss": 2.2562096118927, + "loss_ce": 0.0032799867913126945, + "loss_iou": 0.9296875, + "loss_num": 0.078125, + "loss_xval": 2.25, + "num_input_tokens_seen": 50610348, + "step": 764 + }, + { + "epoch": 0.0716057471802312, + "grad_norm": 16.288908004760742, + "learning_rate": 5e-05, + "loss": 1.7871, + "num_input_tokens_seen": 50677068, + "step": 765 + }, + { + "epoch": 0.0716057471802312, + "loss": 1.954931378364563, + "loss_ce": 0.007665722630918026, + "loss_iou": 0.796875, + "loss_num": 0.07177734375, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 50677068, + "step": 765 + }, + { + "epoch": 0.07169934946412693, + "grad_norm": 26.749717712402344, + "learning_rate": 5e-05, + "loss": 1.628, + "num_input_tokens_seen": 50743656, + "step": 766 + }, + { + "epoch": 0.07169934946412693, + "loss": 1.78788161277771, + "loss_ce": 0.00760816503316164, + "loss_iou": 0.74609375, + "loss_num": 0.05810546875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 50743656, + "step": 766 + }, + { + "epoch": 0.07179295174802265, + "grad_norm": 21.085527420043945, + "learning_rate": 5e-05, + "loss": 1.6886, + "num_input_tokens_seen": 50808832, + "step": 767 + }, + { + "epoch": 0.07179295174802265, + "loss": 1.9450939893722534, + "loss_ce": 0.005640930961817503, + "loss_iou": 0.79296875, + "loss_num": 0.07080078125, + "loss_xval": 1.9375, + "num_input_tokens_seen": 50808832, + "step": 767 + }, + { + "epoch": 0.07188655403191838, + "grad_norm": 27.9982852935791, + "learning_rate": 5e-05, + "loss": 1.7369, + "num_input_tokens_seen": 50875812, + "step": 768 + }, + { + "epoch": 0.07188655403191838, + "loss": 1.67847740650177, + "loss_ce": 0.0026961141265928745, + "loss_iou": 0.7421875, + "loss_num": 0.03857421875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 50875812, + "step": 768 + }, + { + "epoch": 0.0719801563158141, + "grad_norm": 15.898396492004395, + "learning_rate": 5e-05, + "loss": 2.0964, + "num_input_tokens_seen": 50941828, + "step": 769 + }, + { + "epoch": 0.0719801563158141, + "loss": 2.1420369148254395, + "loss_ce": 0.004341588821262121, + "loss_iou": 0.8984375, + "loss_num": 0.06884765625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 50941828, + "step": 769 + }, + { + "epoch": 0.07207375859970984, + "grad_norm": 18.860252380371094, + "learning_rate": 5e-05, + "loss": 1.7662, + "num_input_tokens_seen": 51008116, + "step": 770 + }, + { + "epoch": 0.07207375859970984, + "loss": 1.9062767028808594, + "loss_ce": 0.005153778474777937, + "loss_iou": 0.796875, + "loss_num": 0.061279296875, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 51008116, + "step": 770 + }, + { + "epoch": 0.07216736088360556, + "grad_norm": 23.90975570678711, + "learning_rate": 5e-05, + "loss": 1.7491, + "num_input_tokens_seen": 51073896, + "step": 771 + }, + { + "epoch": 0.07216736088360556, + "loss": 1.8947341442108154, + "loss_ce": 0.008992090821266174, + "loss_iou": 0.7890625, + "loss_num": 0.062255859375, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 51073896, + "step": 771 + }, + { + "epoch": 0.07226096316750129, + "grad_norm": 30.103145599365234, + "learning_rate": 5e-05, + "loss": 1.8469, + "num_input_tokens_seen": 51139272, + "step": 772 + }, + { + "epoch": 0.07226096316750129, + "loss": 1.950621247291565, + "loss_ce": 0.006285249255597591, + "loss_iou": 0.85546875, + "loss_num": 0.046142578125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 51139272, + "step": 772 + }, + { + "epoch": 0.07235456545139701, + "grad_norm": 17.932331085205078, + "learning_rate": 5e-05, + "loss": 2.3164, + "num_input_tokens_seen": 51206216, + "step": 773 + }, + { + "epoch": 0.07235456545139701, + "loss": 2.350923538208008, + "loss_ce": 0.008150112815201283, + "loss_iou": 1.046875, + "loss_num": 0.049560546875, + "loss_xval": 2.34375, + "num_input_tokens_seen": 51206216, + "step": 773 + }, + { + "epoch": 0.07244816773529274, + "grad_norm": 12.034122467041016, + "learning_rate": 5e-05, + "loss": 1.6248, + "num_input_tokens_seen": 51271780, + "step": 774 + }, + { + "epoch": 0.07244816773529274, + "loss": 1.6848853826522827, + "loss_ce": 0.0032448233105242252, + "loss_iou": 0.7421875, + "loss_num": 0.039794921875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 51271780, + "step": 774 + }, + { + "epoch": 0.07254177001918846, + "grad_norm": 14.1908540725708, + "learning_rate": 5e-05, + "loss": 1.6863, + "num_input_tokens_seen": 51338512, + "step": 775 + }, + { + "epoch": 0.07254177001918846, + "loss": 1.6020501852035522, + "loss_ce": 0.002440792042762041, + "loss_iou": 0.71875, + "loss_num": 0.032958984375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 51338512, + "step": 775 + }, + { + "epoch": 0.0726353723030842, + "grad_norm": 15.499090194702148, + "learning_rate": 5e-05, + "loss": 1.7165, + "num_input_tokens_seen": 51403940, + "step": 776 + }, + { + "epoch": 0.0726353723030842, + "loss": 1.53767728805542, + "loss_ce": 0.00398589763790369, + "loss_iou": 0.6328125, + "loss_num": 0.05419921875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 51403940, + "step": 776 + }, + { + "epoch": 0.07272897458697992, + "grad_norm": 22.869251251220703, + "learning_rate": 5e-05, + "loss": 1.8355, + "num_input_tokens_seen": 51469816, + "step": 777 + }, + { + "epoch": 0.07272897458697992, + "loss": 2.0655317306518555, + "loss_ce": 0.006937808357179165, + "loss_iou": 0.859375, + "loss_num": 0.06884765625, + "loss_xval": 2.0625, + "num_input_tokens_seen": 51469816, + "step": 777 + }, + { + "epoch": 0.07282257687087565, + "grad_norm": 19.780359268188477, + "learning_rate": 5e-05, + "loss": 1.9059, + "num_input_tokens_seen": 51536020, + "step": 778 + }, + { + "epoch": 0.07282257687087565, + "loss": 1.9265518188476562, + "loss_ce": 0.003700188361108303, + "loss_iou": 0.8125, + "loss_num": 0.05908203125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 51536020, + "step": 778 + }, + { + "epoch": 0.07291617915477137, + "grad_norm": 20.66790771484375, + "learning_rate": 5e-05, + "loss": 1.5468, + "num_input_tokens_seen": 51603044, + "step": 779 + }, + { + "epoch": 0.07291617915477137, + "loss": 1.4181262254714966, + "loss_ce": 0.007634267210960388, + "loss_iou": 0.61328125, + "loss_num": 0.03662109375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 51603044, + "step": 779 + }, + { + "epoch": 0.0730097814386671, + "grad_norm": 14.053909301757812, + "learning_rate": 5e-05, + "loss": 2.1016, + "num_input_tokens_seen": 51669320, + "step": 780 + }, + { + "epoch": 0.0730097814386671, + "loss": 2.0850119590759277, + "loss_ce": 0.008840063586831093, + "loss_iou": 0.86328125, + "loss_num": 0.0693359375, + "loss_xval": 2.078125, + "num_input_tokens_seen": 51669320, + "step": 780 + }, + { + "epoch": 0.07310338372256284, + "grad_norm": 13.39023494720459, + "learning_rate": 5e-05, + "loss": 1.5063, + "num_input_tokens_seen": 51736344, + "step": 781 + }, + { + "epoch": 0.07310338372256284, + "loss": 1.3768768310546875, + "loss_ce": 0.0038299686275422573, + "loss_iou": 0.62109375, + "loss_num": 0.0255126953125, + "loss_xval": 1.375, + "num_input_tokens_seen": 51736344, + "step": 781 + }, + { + "epoch": 0.07319698600645856, + "grad_norm": 12.973723411560059, + "learning_rate": 5e-05, + "loss": 1.7162, + "num_input_tokens_seen": 51801984, + "step": 782 + }, + { + "epoch": 0.07319698600645856, + "loss": 1.7734899520874023, + "loss_ce": 0.004935131408274174, + "loss_iou": 0.77734375, + "loss_num": 0.043212890625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 51801984, + "step": 782 + }, + { + "epoch": 0.07329058829035429, + "grad_norm": 18.570348739624023, + "learning_rate": 5e-05, + "loss": 1.6957, + "num_input_tokens_seen": 51867540, + "step": 783 + }, + { + "epoch": 0.07329058829035429, + "loss": 1.6712414026260376, + "loss_ce": 0.002296093385666609, + "loss_iou": 0.703125, + "loss_num": 0.052734375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 51867540, + "step": 783 + }, + { + "epoch": 0.07338419057425001, + "grad_norm": 11.181182861328125, + "learning_rate": 5e-05, + "loss": 1.674, + "num_input_tokens_seen": 51934840, + "step": 784 + }, + { + "epoch": 0.07338419057425001, + "loss": 1.7133533954620361, + "loss_ce": 0.005345569923520088, + "loss_iou": 0.7734375, + "loss_num": 0.03173828125, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 51934840, + "step": 784 + }, + { + "epoch": 0.07347779285814574, + "grad_norm": 10.359596252441406, + "learning_rate": 5e-05, + "loss": 1.7389, + "num_input_tokens_seen": 52002108, + "step": 785 + }, + { + "epoch": 0.07347779285814574, + "loss": 1.7151685953140259, + "loss_ce": 0.00520765595138073, + "loss_iou": 0.75, + "loss_num": 0.0419921875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 52002108, + "step": 785 + }, + { + "epoch": 0.07357139514204146, + "grad_norm": 16.918203353881836, + "learning_rate": 5e-05, + "loss": 1.7026, + "num_input_tokens_seen": 52068252, + "step": 786 + }, + { + "epoch": 0.07357139514204146, + "loss": 1.8131422996520996, + "loss_ce": 0.005525010172277689, + "loss_iou": 0.76171875, + "loss_num": 0.056640625, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 52068252, + "step": 786 + }, + { + "epoch": 0.0736649974259372, + "grad_norm": 18.828004837036133, + "learning_rate": 5e-05, + "loss": 1.7878, + "num_input_tokens_seen": 52135224, + "step": 787 + }, + { + "epoch": 0.0736649974259372, + "loss": 2.062363862991333, + "loss_ce": 0.003770082723349333, + "loss_iou": 0.890625, + "loss_num": 0.055908203125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 52135224, + "step": 787 + }, + { + "epoch": 0.07375859970983292, + "grad_norm": 21.628116607666016, + "learning_rate": 5e-05, + "loss": 1.8181, + "num_input_tokens_seen": 52201892, + "step": 788 + }, + { + "epoch": 0.07375859970983292, + "loss": 1.8004341125488281, + "loss_ce": 0.0035592170897871256, + "loss_iou": 0.76953125, + "loss_num": 0.051025390625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 52201892, + "step": 788 + }, + { + "epoch": 0.07385220199372865, + "grad_norm": 51.08279037475586, + "learning_rate": 5e-05, + "loss": 1.6725, + "num_input_tokens_seen": 52267464, + "step": 789 + }, + { + "epoch": 0.07385220199372865, + "loss": 1.689554214477539, + "loss_ce": 0.002542572095990181, + "loss_iou": 0.73828125, + "loss_num": 0.042236328125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 52267464, + "step": 789 + }, + { + "epoch": 0.07394580427762437, + "grad_norm": 11.357071876525879, + "learning_rate": 5e-05, + "loss": 2.0696, + "num_input_tokens_seen": 52333484, + "step": 790 + }, + { + "epoch": 0.07394580427762437, + "loss": 1.8198058605194092, + "loss_ce": 0.014508059248328209, + "loss_iou": 0.6953125, + "loss_num": 0.083984375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 52333484, + "step": 790 + }, + { + "epoch": 0.0740394065615201, + "grad_norm": 11.868918418884277, + "learning_rate": 5e-05, + "loss": 2.0323, + "num_input_tokens_seen": 52400340, + "step": 791 + }, + { + "epoch": 0.0740394065615201, + "loss": 2.188082695007324, + "loss_ce": 0.005465551745146513, + "loss_iou": 0.88671875, + "loss_num": 0.08251953125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 52400340, + "step": 791 + }, + { + "epoch": 0.07413300884541583, + "grad_norm": 12.103880882263184, + "learning_rate": 5e-05, + "loss": 1.5496, + "num_input_tokens_seen": 52467120, + "step": 792 + }, + { + "epoch": 0.07413300884541583, + "loss": 1.575488567352295, + "loss_ce": 0.007129143923521042, + "loss_iou": 0.671875, + "loss_num": 0.04541015625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 52467120, + "step": 792 + }, + { + "epoch": 0.07422661112931156, + "grad_norm": 18.401817321777344, + "learning_rate": 5e-05, + "loss": 1.3835, + "num_input_tokens_seen": 52533096, + "step": 793 + }, + { + "epoch": 0.07422661112931156, + "loss": 1.6278660297393799, + "loss_ce": 0.0077488478273153305, + "loss_iou": 0.6875, + "loss_num": 0.049072265625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 52533096, + "step": 793 + }, + { + "epoch": 0.07432021341320728, + "grad_norm": 15.680350303649902, + "learning_rate": 5e-05, + "loss": 1.712, + "num_input_tokens_seen": 52598404, + "step": 794 + }, + { + "epoch": 0.07432021341320728, + "loss": 1.5534054040908813, + "loss_ce": 0.003600743133574724, + "loss_iou": 0.6875, + "loss_num": 0.0341796875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 52598404, + "step": 794 + }, + { + "epoch": 0.07441381569710301, + "grad_norm": 14.900457382202148, + "learning_rate": 5e-05, + "loss": 1.7548, + "num_input_tokens_seen": 52664512, + "step": 795 + }, + { + "epoch": 0.07441381569710301, + "loss": 1.6482797861099243, + "loss_ce": 0.004725020378828049, + "loss_iou": 0.72265625, + "loss_num": 0.03955078125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 52664512, + "step": 795 + }, + { + "epoch": 0.07450741798099873, + "grad_norm": 22.29669189453125, + "learning_rate": 5e-05, + "loss": 1.6021, + "num_input_tokens_seen": 52730696, + "step": 796 + }, + { + "epoch": 0.07450741798099873, + "loss": 1.7194867134094238, + "loss_ce": 0.0026898817159235477, + "loss_iou": 0.7578125, + "loss_num": 0.041015625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 52730696, + "step": 796 + }, + { + "epoch": 0.07460102026489446, + "grad_norm": 20.31715202331543, + "learning_rate": 5e-05, + "loss": 1.8804, + "num_input_tokens_seen": 52796632, + "step": 797 + }, + { + "epoch": 0.07460102026489446, + "loss": 1.814350962638855, + "loss_ce": 0.0018509826622903347, + "loss_iou": 0.796875, + "loss_num": 0.0439453125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 52796632, + "step": 797 + }, + { + "epoch": 0.0746946225487902, + "grad_norm": 12.104740142822266, + "learning_rate": 5e-05, + "loss": 1.433, + "num_input_tokens_seen": 52862996, + "step": 798 + }, + { + "epoch": 0.0746946225487902, + "loss": 1.4822030067443848, + "loss_ce": 0.0046640620566904545, + "loss_iou": 0.65234375, + "loss_num": 0.034423828125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 52862996, + "step": 798 + }, + { + "epoch": 0.07478822483268592, + "grad_norm": 19.013381958007812, + "learning_rate": 5e-05, + "loss": 1.761, + "num_input_tokens_seen": 52930324, + "step": 799 + }, + { + "epoch": 0.07478822483268592, + "loss": 1.7661657333374023, + "loss_ce": 0.003470430849120021, + "loss_iou": 0.78515625, + "loss_num": 0.03857421875, + "loss_xval": 1.765625, + "num_input_tokens_seen": 52930324, + "step": 799 + }, + { + "epoch": 0.07488182711658165, + "grad_norm": 12.877026557922363, + "learning_rate": 5e-05, + "loss": 1.849, + "num_input_tokens_seen": 52996436, + "step": 800 + }, + { + "epoch": 0.07488182711658165, + "loss": 1.742581844329834, + "loss_ce": 0.0033240043558180332, + "loss_iou": 0.77734375, + "loss_num": 0.037353515625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 52996436, + "step": 800 + }, + { + "epoch": 0.07497542940047737, + "grad_norm": 13.212465286254883, + "learning_rate": 5e-05, + "loss": 1.387, + "num_input_tokens_seen": 53063164, + "step": 801 + }, + { + "epoch": 0.07497542940047737, + "loss": 1.4713304042816162, + "loss_ce": 0.0025803588796406984, + "loss_iou": 0.66015625, + "loss_num": 0.0299072265625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 53063164, + "step": 801 + }, + { + "epoch": 0.0750690316843731, + "grad_norm": 32.19801330566406, + "learning_rate": 5e-05, + "loss": 1.8543, + "num_input_tokens_seen": 53127832, + "step": 802 + }, + { + "epoch": 0.0750690316843731, + "loss": 1.6698976755142212, + "loss_ce": 0.004370349459350109, + "loss_iou": 0.71484375, + "loss_num": 0.047119140625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 53127832, + "step": 802 + }, + { + "epoch": 0.07516263396826883, + "grad_norm": 12.36397647857666, + "learning_rate": 5e-05, + "loss": 2.0378, + "num_input_tokens_seen": 53193568, + "step": 803 + }, + { + "epoch": 0.07516263396826883, + "loss": 2.122973918914795, + "loss_ce": 0.002856625011190772, + "loss_iou": 0.89453125, + "loss_num": 0.06689453125, + "loss_xval": 2.125, + "num_input_tokens_seen": 53193568, + "step": 803 + }, + { + "epoch": 0.07525623625216456, + "grad_norm": 15.287789344787598, + "learning_rate": 5e-05, + "loss": 1.7173, + "num_input_tokens_seen": 53258844, + "step": 804 + }, + { + "epoch": 0.07525623625216456, + "loss": 1.6398072242736816, + "loss_ce": 0.0016236234223470092, + "loss_iou": 0.6796875, + "loss_num": 0.0556640625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 53258844, + "step": 804 + }, + { + "epoch": 0.07534983853606028, + "grad_norm": 27.020416259765625, + "learning_rate": 5e-05, + "loss": 1.4972, + "num_input_tokens_seen": 53324624, + "step": 805 + }, + { + "epoch": 0.07534983853606028, + "loss": 1.4142091274261475, + "loss_ce": 0.0016114177415147424, + "loss_iou": 0.6015625, + "loss_num": 0.04248046875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 53324624, + "step": 805 + }, + { + "epoch": 0.075443440819956, + "grad_norm": 59.48409652709961, + "learning_rate": 5e-05, + "loss": 1.789, + "num_input_tokens_seen": 53390328, + "step": 806 + }, + { + "epoch": 0.075443440819956, + "loss": 1.6413440704345703, + "loss_ce": 0.002916322322562337, + "loss_iou": 0.6875, + "loss_num": 0.05224609375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 53390328, + "step": 806 + }, + { + "epoch": 0.07553704310385173, + "grad_norm": 10.196464538574219, + "learning_rate": 5e-05, + "loss": 1.3888, + "num_input_tokens_seen": 53457428, + "step": 807 + }, + { + "epoch": 0.07553704310385173, + "loss": 1.3960691690444946, + "loss_ce": 0.004467627964913845, + "loss_iou": 0.625, + "loss_num": 0.0281982421875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 53457428, + "step": 807 + }, + { + "epoch": 0.07563064538774746, + "grad_norm": 27.761362075805664, + "learning_rate": 5e-05, + "loss": 1.6037, + "num_input_tokens_seen": 53522556, + "step": 808 + }, + { + "epoch": 0.07563064538774746, + "loss": 1.5643582344055176, + "loss_ce": 0.00588653702288866, + "loss_iou": 0.65234375, + "loss_num": 0.050048828125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 53522556, + "step": 808 + }, + { + "epoch": 0.0757242476716432, + "grad_norm": 13.68408203125, + "learning_rate": 5e-05, + "loss": 1.8785, + "num_input_tokens_seen": 53589788, + "step": 809 + }, + { + "epoch": 0.0757242476716432, + "loss": 1.970529317855835, + "loss_ce": 0.0037324335426092148, + "loss_iou": 0.828125, + "loss_num": 0.06103515625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 53589788, + "step": 809 + }, + { + "epoch": 0.07581784995553892, + "grad_norm": 33.96290969848633, + "learning_rate": 5e-05, + "loss": 1.7861, + "num_input_tokens_seen": 53656668, + "step": 810 + }, + { + "epoch": 0.07581784995553892, + "loss": 1.8013924360275269, + "loss_ce": 0.010376828722655773, + "loss_iou": 0.76953125, + "loss_num": 0.050048828125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 53656668, + "step": 810 + }, + { + "epoch": 0.07591145223943464, + "grad_norm": 19.82717514038086, + "learning_rate": 5e-05, + "loss": 1.7128, + "num_input_tokens_seen": 53722308, + "step": 811 + }, + { + "epoch": 0.07591145223943464, + "loss": 1.6208107471466064, + "loss_ce": 0.0026467025745660067, + "loss_iou": 0.72265625, + "loss_num": 0.03515625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 53722308, + "step": 811 + }, + { + "epoch": 0.07600505452333037, + "grad_norm": 11.903279304504395, + "learning_rate": 5e-05, + "loss": 1.5115, + "num_input_tokens_seen": 53788076, + "step": 812 + }, + { + "epoch": 0.07600505452333037, + "loss": 1.4619097709655762, + "loss_ce": 0.00561102107167244, + "loss_iou": 0.59375, + "loss_num": 0.0546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 53788076, + "step": 812 + }, + { + "epoch": 0.07609865680722609, + "grad_norm": 17.889984130859375, + "learning_rate": 5e-05, + "loss": 1.5059, + "num_input_tokens_seen": 53855092, + "step": 813 + }, + { + "epoch": 0.07609865680722609, + "loss": 1.3990848064422607, + "loss_ce": 0.0035768812522292137, + "loss_iou": 0.62890625, + "loss_num": 0.028076171875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 53855092, + "step": 813 + }, + { + "epoch": 0.07619225909112182, + "grad_norm": 23.897436141967773, + "learning_rate": 5e-05, + "loss": 1.91, + "num_input_tokens_seen": 53920380, + "step": 814 + }, + { + "epoch": 0.07619225909112182, + "loss": 1.6896394491195679, + "loss_ce": 0.0065340036526322365, + "loss_iou": 0.71484375, + "loss_num": 0.051025390625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 53920380, + "step": 814 + }, + { + "epoch": 0.07628586137501756, + "grad_norm": 15.09005355834961, + "learning_rate": 5e-05, + "loss": 1.6063, + "num_input_tokens_seen": 53986744, + "step": 815 + }, + { + "epoch": 0.07628586137501756, + "loss": 1.7806751728057861, + "loss_ce": 0.005284649785608053, + "loss_iou": 0.7578125, + "loss_num": 0.052001953125, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 53986744, + "step": 815 + }, + { + "epoch": 0.07637946365891328, + "grad_norm": 35.94722366333008, + "learning_rate": 5e-05, + "loss": 1.7237, + "num_input_tokens_seen": 54052980, + "step": 816 + }, + { + "epoch": 0.07637946365891328, + "loss": 1.668999195098877, + "loss_ce": 0.006889849901199341, + "loss_iou": 0.74609375, + "loss_num": 0.0341796875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 54052980, + "step": 816 + }, + { + "epoch": 0.076473065942809, + "grad_norm": 12.649584770202637, + "learning_rate": 5e-05, + "loss": 2.0055, + "num_input_tokens_seen": 54119408, + "step": 817 + }, + { + "epoch": 0.076473065942809, + "loss": 1.992591142654419, + "loss_ce": 0.0033332628663629293, + "loss_iou": 0.8671875, + "loss_num": 0.052001953125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 54119408, + "step": 817 + }, + { + "epoch": 0.07656666822670473, + "grad_norm": 24.400339126586914, + "learning_rate": 5e-05, + "loss": 1.6179, + "num_input_tokens_seen": 54185236, + "step": 818 + }, + { + "epoch": 0.07656666822670473, + "loss": 1.61531400680542, + "loss_ce": 0.0047183167189359665, + "loss_iou": 0.59375, + "loss_num": 0.08544921875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 54185236, + "step": 818 + }, + { + "epoch": 0.07666027051060045, + "grad_norm": 10.225992202758789, + "learning_rate": 5e-05, + "loss": 1.4889, + "num_input_tokens_seen": 54251224, + "step": 819 + }, + { + "epoch": 0.07666027051060045, + "loss": 1.5908461809158325, + "loss_ce": 0.0068618240766227245, + "loss_iou": 0.6796875, + "loss_num": 0.04443359375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 54251224, + "step": 819 + }, + { + "epoch": 0.07675387279449619, + "grad_norm": 10.83730697631836, + "learning_rate": 5e-05, + "loss": 2.0075, + "num_input_tokens_seen": 54317208, + "step": 820 + }, + { + "epoch": 0.07675387279449619, + "loss": 2.03198504447937, + "loss_ce": 0.00464137177914381, + "loss_iou": 0.86328125, + "loss_num": 0.06005859375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 54317208, + "step": 820 + }, + { + "epoch": 0.07684747507839192, + "grad_norm": 40.60268783569336, + "learning_rate": 5e-05, + "loss": 1.7607, + "num_input_tokens_seen": 54382544, + "step": 821 + }, + { + "epoch": 0.07684747507839192, + "loss": 1.7447590827941895, + "loss_ce": 0.005501284264028072, + "loss_iou": 0.78125, + "loss_num": 0.03466796875, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 54382544, + "step": 821 + }, + { + "epoch": 0.07694107736228764, + "grad_norm": 8.829503059387207, + "learning_rate": 5e-05, + "loss": 2.058, + "num_input_tokens_seen": 54448428, + "step": 822 + }, + { + "epoch": 0.07694107736228764, + "loss": 2.0724472999572754, + "loss_ce": 0.002134760608896613, + "loss_iou": 0.890625, + "loss_num": 0.057373046875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 54448428, + "step": 822 + }, + { + "epoch": 0.07703467964618337, + "grad_norm": 11.689741134643555, + "learning_rate": 5e-05, + "loss": 1.9954, + "num_input_tokens_seen": 54515384, + "step": 823 + }, + { + "epoch": 0.07703467964618337, + "loss": 2.0363917350769043, + "loss_ce": 0.003188550937920809, + "loss_iou": 0.9140625, + "loss_num": 0.0400390625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 54515384, + "step": 823 + }, + { + "epoch": 0.07712828193007909, + "grad_norm": 17.452810287475586, + "learning_rate": 5e-05, + "loss": 1.8118, + "num_input_tokens_seen": 54582156, + "step": 824 + }, + { + "epoch": 0.07712828193007909, + "loss": 1.853980541229248, + "loss_ce": 0.005347815342247486, + "loss_iou": 0.796875, + "loss_num": 0.05078125, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 54582156, + "step": 824 + }, + { + "epoch": 0.07722188421397481, + "grad_norm": 48.813594818115234, + "learning_rate": 5e-05, + "loss": 1.6049, + "num_input_tokens_seen": 54647936, + "step": 825 + }, + { + "epoch": 0.07722188421397481, + "loss": 1.3968123197555542, + "loss_ce": 0.004966678563505411, + "loss_iou": 0.5859375, + "loss_num": 0.043701171875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 54647936, + "step": 825 + }, + { + "epoch": 0.07731548649787055, + "grad_norm": 23.729625701904297, + "learning_rate": 5e-05, + "loss": 1.6444, + "num_input_tokens_seen": 54711476, + "step": 826 + }, + { + "epoch": 0.07731548649787055, + "loss": 1.8388928174972534, + "loss_ce": 0.003931805491447449, + "loss_iou": 0.7734375, + "loss_num": 0.0576171875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 54711476, + "step": 826 + }, + { + "epoch": 0.07740908878176628, + "grad_norm": 15.217606544494629, + "learning_rate": 5e-05, + "loss": 1.6576, + "num_input_tokens_seen": 54778988, + "step": 827 + }, + { + "epoch": 0.07740908878176628, + "loss": 1.6610138416290283, + "loss_ce": 0.0028106600511819124, + "loss_iou": 0.734375, + "loss_num": 0.037109375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 54778988, + "step": 827 + }, + { + "epoch": 0.077502691065662, + "grad_norm": 17.45100212097168, + "learning_rate": 5e-05, + "loss": 1.7092, + "num_input_tokens_seen": 54845004, + "step": 828 + }, + { + "epoch": 0.077502691065662, + "loss": 1.8006932735443115, + "loss_ce": 0.00418446259573102, + "loss_iou": 0.7578125, + "loss_num": 0.055908203125, + "loss_xval": 1.796875, + "num_input_tokens_seen": 54845004, + "step": 828 + }, + { + "epoch": 0.07759629334955773, + "grad_norm": 19.25678825378418, + "learning_rate": 5e-05, + "loss": 1.6017, + "num_input_tokens_seen": 54910868, + "step": 829 + }, + { + "epoch": 0.07759629334955773, + "loss": 1.8255311250686646, + "loss_ce": 0.004242067225277424, + "loss_iou": 0.78125, + "loss_num": 0.05126953125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 54910868, + "step": 829 + }, + { + "epoch": 0.07768989563345345, + "grad_norm": 18.64685821533203, + "learning_rate": 5e-05, + "loss": 1.8397, + "num_input_tokens_seen": 54976972, + "step": 830 + }, + { + "epoch": 0.07768989563345345, + "loss": 1.816882848739624, + "loss_ce": 0.0034062829799950123, + "loss_iou": 0.78125, + "loss_num": 0.05126953125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 54976972, + "step": 830 + }, + { + "epoch": 0.07778349791734919, + "grad_norm": 18.513874053955078, + "learning_rate": 5e-05, + "loss": 1.6702, + "num_input_tokens_seen": 55042652, + "step": 831 + }, + { + "epoch": 0.07778349791734919, + "loss": 1.558748722076416, + "loss_ce": 0.005037764087319374, + "loss_iou": 0.703125, + "loss_num": 0.0296630859375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 55042652, + "step": 831 + }, + { + "epoch": 0.07787710020124491, + "grad_norm": 34.15970230102539, + "learning_rate": 5e-05, + "loss": 1.7013, + "num_input_tokens_seen": 55109936, + "step": 832 + }, + { + "epoch": 0.07787710020124491, + "loss": 1.7062803506851196, + "loss_ce": 0.002178819617256522, + "loss_iou": 0.74609375, + "loss_num": 0.0419921875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 55109936, + "step": 832 + }, + { + "epoch": 0.07797070248514064, + "grad_norm": 25.373519897460938, + "learning_rate": 5e-05, + "loss": 1.5804, + "num_input_tokens_seen": 55175960, + "step": 833 + }, + { + "epoch": 0.07797070248514064, + "loss": 1.596665859222412, + "loss_ce": 0.0009626125684008002, + "loss_iou": 0.6875, + "loss_num": 0.043212890625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 55175960, + "step": 833 + }, + { + "epoch": 0.07806430476903636, + "grad_norm": 12.413987159729004, + "learning_rate": 5e-05, + "loss": 1.8258, + "num_input_tokens_seen": 55242060, + "step": 834 + }, + { + "epoch": 0.07806430476903636, + "loss": 1.925345778465271, + "loss_ce": 0.00542389415204525, + "loss_iou": 0.828125, + "loss_num": 0.052734375, + "loss_xval": 1.921875, + "num_input_tokens_seen": 55242060, + "step": 834 + }, + { + "epoch": 0.07815790705293209, + "grad_norm": 14.300704956054688, + "learning_rate": 5e-05, + "loss": 1.6578, + "num_input_tokens_seen": 55308092, + "step": 835 + }, + { + "epoch": 0.07815790705293209, + "loss": 1.5969938039779663, + "loss_ce": 0.002267252653837204, + "loss_iou": 0.6953125, + "loss_num": 0.040283203125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 55308092, + "step": 835 + }, + { + "epoch": 0.07825150933682781, + "grad_norm": 22.78268051147461, + "learning_rate": 5e-05, + "loss": 1.9089, + "num_input_tokens_seen": 55373980, + "step": 836 + }, + { + "epoch": 0.07825150933682781, + "loss": 2.0152018070220947, + "loss_ce": 0.004459593910723925, + "loss_iou": 0.84765625, + "loss_num": 0.0634765625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 55373980, + "step": 836 + }, + { + "epoch": 0.07834511162072355, + "grad_norm": 16.693683624267578, + "learning_rate": 5e-05, + "loss": 2.1208, + "num_input_tokens_seen": 55439788, + "step": 837 + }, + { + "epoch": 0.07834511162072355, + "loss": 2.102088451385498, + "loss_ce": 0.0073621273040771484, + "loss_iou": 0.90625, + "loss_num": 0.0556640625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 55439788, + "step": 837 + }, + { + "epoch": 0.07843871390461928, + "grad_norm": 8.320754051208496, + "learning_rate": 5e-05, + "loss": 1.426, + "num_input_tokens_seen": 55506704, + "step": 838 + }, + { + "epoch": 0.07843871390461928, + "loss": 1.4811341762542725, + "loss_ce": 0.0055482941679656506, + "loss_iou": 0.62109375, + "loss_num": 0.047119140625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 55506704, + "step": 838 + }, + { + "epoch": 0.078532316188515, + "grad_norm": 10.999988555908203, + "learning_rate": 5e-05, + "loss": 1.4888, + "num_input_tokens_seen": 55572076, + "step": 839 + }, + { + "epoch": 0.078532316188515, + "loss": 1.1216778755187988, + "loss_ce": 0.0038800942711532116, + "loss_iou": 0.421875, + "loss_num": 0.054443359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 55572076, + "step": 839 + }, + { + "epoch": 0.07862591847241072, + "grad_norm": 15.208309173583984, + "learning_rate": 5e-05, + "loss": 1.6326, + "num_input_tokens_seen": 55637976, + "step": 840 + }, + { + "epoch": 0.07862591847241072, + "loss": 1.6669137477874756, + "loss_ce": 0.003827746957540512, + "loss_iou": 0.734375, + "loss_num": 0.03955078125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 55637976, + "step": 840 + }, + { + "epoch": 0.07871952075630645, + "grad_norm": 22.545604705810547, + "learning_rate": 5e-05, + "loss": 1.641, + "num_input_tokens_seen": 55704360, + "step": 841 + }, + { + "epoch": 0.07871952075630645, + "loss": 1.773759126663208, + "loss_ce": 0.002274680184200406, + "loss_iou": 0.765625, + "loss_num": 0.048583984375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 55704360, + "step": 841 + }, + { + "epoch": 0.07881312304020219, + "grad_norm": 19.781217575073242, + "learning_rate": 5e-05, + "loss": 1.6991, + "num_input_tokens_seen": 55770648, + "step": 842 + }, + { + "epoch": 0.07881312304020219, + "loss": 1.7591058015823364, + "loss_ce": 0.006664406508207321, + "loss_iou": 0.76953125, + "loss_num": 0.043212890625, + "loss_xval": 1.75, + "num_input_tokens_seen": 55770648, + "step": 842 + }, + { + "epoch": 0.07890672532409791, + "grad_norm": 15.822068214416504, + "learning_rate": 5e-05, + "loss": 1.7774, + "num_input_tokens_seen": 55837896, + "step": 843 + }, + { + "epoch": 0.07890672532409791, + "loss": 1.8255829811096191, + "loss_ce": 0.004293967038393021, + "loss_iou": 0.796875, + "loss_num": 0.046630859375, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 55837896, + "step": 843 + }, + { + "epoch": 0.07900032760799364, + "grad_norm": 14.252395629882812, + "learning_rate": 5e-05, + "loss": 1.6272, + "num_input_tokens_seen": 55904532, + "step": 844 + }, + { + "epoch": 0.07900032760799364, + "loss": 1.6053745746612549, + "loss_ce": 0.0028354646638035774, + "loss_iou": 0.6796875, + "loss_num": 0.049560546875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 55904532, + "step": 844 + }, + { + "epoch": 0.07909392989188936, + "grad_norm": 27.333084106445312, + "learning_rate": 5e-05, + "loss": 1.5755, + "num_input_tokens_seen": 55971000, + "step": 845 + }, + { + "epoch": 0.07909392989188936, + "loss": 1.3764841556549072, + "loss_ce": 0.002460794523358345, + "loss_iou": 0.62890625, + "loss_num": 0.022216796875, + "loss_xval": 1.375, + "num_input_tokens_seen": 55971000, + "step": 845 + }, + { + "epoch": 0.07918753217578509, + "grad_norm": 16.1622314453125, + "learning_rate": 5e-05, + "loss": 2.0193, + "num_input_tokens_seen": 56037060, + "step": 846 + }, + { + "epoch": 0.07918753217578509, + "loss": 2.0520763397216797, + "loss_ce": 0.0052011888474226, + "loss_iou": 0.8671875, + "loss_num": 0.06298828125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 56037060, + "step": 846 + }, + { + "epoch": 0.07928113445968081, + "grad_norm": 13.176560401916504, + "learning_rate": 5e-05, + "loss": 1.6272, + "num_input_tokens_seen": 56102972, + "step": 847 + }, + { + "epoch": 0.07928113445968081, + "loss": 1.5083166360855103, + "loss_ce": 0.0029455760959535837, + "loss_iou": 0.6328125, + "loss_num": 0.047607421875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 56102972, + "step": 847 + }, + { + "epoch": 0.07937473674357655, + "grad_norm": 25.884740829467773, + "learning_rate": 5e-05, + "loss": 1.7202, + "num_input_tokens_seen": 56168500, + "step": 848 + }, + { + "epoch": 0.07937473674357655, + "loss": 1.7484092712402344, + "loss_ce": 0.0023155626840889454, + "loss_iou": 0.765625, + "loss_num": 0.04248046875, + "loss_xval": 1.75, + "num_input_tokens_seen": 56168500, + "step": 848 + }, + { + "epoch": 0.07946833902747227, + "grad_norm": 15.529861450195312, + "learning_rate": 5e-05, + "loss": 1.9193, + "num_input_tokens_seen": 56234944, + "step": 849 + }, + { + "epoch": 0.07946833902747227, + "loss": 1.848803997039795, + "loss_ce": 0.005054059904068708, + "loss_iou": 0.828125, + "loss_num": 0.038330078125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 56234944, + "step": 849 + }, + { + "epoch": 0.079561941311368, + "grad_norm": 20.716421127319336, + "learning_rate": 5e-05, + "loss": 1.7752, + "num_input_tokens_seen": 56300952, + "step": 850 + }, + { + "epoch": 0.079561941311368, + "loss": 1.8582499027252197, + "loss_ce": 0.006687378976494074, + "loss_iou": 0.79296875, + "loss_num": 0.052734375, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 56300952, + "step": 850 + }, + { + "epoch": 0.07965554359526372, + "grad_norm": 17.88934898376465, + "learning_rate": 5e-05, + "loss": 1.7198, + "num_input_tokens_seen": 56367440, + "step": 851 + }, + { + "epoch": 0.07965554359526372, + "loss": 1.7001900672912598, + "loss_ce": 0.0029244059696793556, + "loss_iou": 0.7109375, + "loss_num": 0.0556640625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 56367440, + "step": 851 + }, + { + "epoch": 0.07974914587915945, + "grad_norm": 28.757474899291992, + "learning_rate": 5e-05, + "loss": 1.9239, + "num_input_tokens_seen": 56434612, + "step": 852 + }, + { + "epoch": 0.07974914587915945, + "loss": 1.8636595010757446, + "loss_ce": 0.003307967446744442, + "loss_iou": 0.8359375, + "loss_num": 0.03857421875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 56434612, + "step": 852 + }, + { + "epoch": 0.07984274816305517, + "grad_norm": 12.52474308013916, + "learning_rate": 5e-05, + "loss": 1.7685, + "num_input_tokens_seen": 56500432, + "step": 853 + }, + { + "epoch": 0.07984274816305517, + "loss": 1.8079160451889038, + "loss_ce": 0.003228507237508893, + "loss_iou": 0.8046875, + "loss_num": 0.038818359375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 56500432, + "step": 853 + }, + { + "epoch": 0.07993635044695091, + "grad_norm": 13.405757904052734, + "learning_rate": 5e-05, + "loss": 1.6683, + "num_input_tokens_seen": 56566408, + "step": 854 + }, + { + "epoch": 0.07993635044695091, + "loss": 1.7243707180023193, + "loss_ce": 0.0026910006999969482, + "loss_iou": 0.7578125, + "loss_num": 0.040283203125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 56566408, + "step": 854 + }, + { + "epoch": 0.08002995273084663, + "grad_norm": 9.93342399597168, + "learning_rate": 5e-05, + "loss": 1.4518, + "num_input_tokens_seen": 56633640, + "step": 855 + }, + { + "epoch": 0.08002995273084663, + "loss": 1.5757908821105957, + "loss_ce": 0.006455020979046822, + "loss_iou": 0.67578125, + "loss_num": 0.0439453125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 56633640, + "step": 855 + }, + { + "epoch": 0.08012355501474236, + "grad_norm": 11.159290313720703, + "learning_rate": 5e-05, + "loss": 1.3769, + "num_input_tokens_seen": 56698748, + "step": 856 + }, + { + "epoch": 0.08012355501474236, + "loss": 1.2927894592285156, + "loss_ce": 0.004337380640208721, + "loss_iou": 0.578125, + "loss_num": 0.0264892578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 56698748, + "step": 856 + }, + { + "epoch": 0.08021715729863808, + "grad_norm": 28.94257926940918, + "learning_rate": 5e-05, + "loss": 1.8194, + "num_input_tokens_seen": 56765084, + "step": 857 + }, + { + "epoch": 0.08021715729863808, + "loss": 1.9139653444290161, + "loss_ce": 0.00576228741556406, + "loss_iou": 0.84375, + "loss_num": 0.04345703125, + "loss_xval": 1.90625, + "num_input_tokens_seen": 56765084, + "step": 857 + }, + { + "epoch": 0.08031075958253381, + "grad_norm": 14.588022232055664, + "learning_rate": 5e-05, + "loss": 1.9083, + "num_input_tokens_seen": 56831688, + "step": 858 + }, + { + "epoch": 0.08031075958253381, + "loss": 2.0022599697113037, + "loss_ce": 0.0061661312356591225, + "loss_iou": 0.8515625, + "loss_num": 0.059326171875, + "loss_xval": 2.0, + "num_input_tokens_seen": 56831688, + "step": 858 + }, + { + "epoch": 0.08040436186642955, + "grad_norm": 17.378747940063477, + "learning_rate": 5e-05, + "loss": 1.8547, + "num_input_tokens_seen": 56898100, + "step": 859 + }, + { + "epoch": 0.08040436186642955, + "loss": 2.0381417274475098, + "loss_ce": 0.0039619808085262775, + "loss_iou": 0.86328125, + "loss_num": 0.0615234375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 56898100, + "step": 859 + }, + { + "epoch": 0.08049796415032527, + "grad_norm": 23.05256462097168, + "learning_rate": 5e-05, + "loss": 1.5782, + "num_input_tokens_seen": 56964556, + "step": 860 + }, + { + "epoch": 0.08049796415032527, + "loss": 1.7002537250518799, + "loss_ce": 0.0020115277729928493, + "loss_iou": 0.76171875, + "loss_num": 0.03466796875, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 56964556, + "step": 860 + }, + { + "epoch": 0.080591566434221, + "grad_norm": 13.92392635345459, + "learning_rate": 5e-05, + "loss": 1.8908, + "num_input_tokens_seen": 57029556, + "step": 861 + }, + { + "epoch": 0.080591566434221, + "loss": 1.8966186046600342, + "loss_ce": 0.0020873812027275562, + "loss_iou": 0.8515625, + "loss_num": 0.03759765625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 57029556, + "step": 861 + }, + { + "epoch": 0.08068516871811672, + "grad_norm": 43.778316497802734, + "learning_rate": 5e-05, + "loss": 1.6896, + "num_input_tokens_seen": 57096008, + "step": 862 + }, + { + "epoch": 0.08068516871811672, + "loss": 1.8149826526641846, + "loss_ce": 0.0063889408484101295, + "loss_iou": 0.77734375, + "loss_num": 0.05078125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 57096008, + "step": 862 + }, + { + "epoch": 0.08077877100201244, + "grad_norm": 38.19126892089844, + "learning_rate": 5e-05, + "loss": 1.7982, + "num_input_tokens_seen": 57162588, + "step": 863 + }, + { + "epoch": 0.08077877100201244, + "loss": 1.8195277452468872, + "loss_ce": 0.004586325958371162, + "loss_iou": 0.78515625, + "loss_num": 0.048828125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 57162588, + "step": 863 + }, + { + "epoch": 0.08087237328590817, + "grad_norm": 19.279468536376953, + "learning_rate": 5e-05, + "loss": 1.6604, + "num_input_tokens_seen": 57229352, + "step": 864 + }, + { + "epoch": 0.08087237328590817, + "loss": 1.5278937816619873, + "loss_ce": 0.0059212022460997105, + "loss_iou": 0.66796875, + "loss_num": 0.0380859375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 57229352, + "step": 864 + }, + { + "epoch": 0.08096597556980391, + "grad_norm": 31.98632049560547, + "learning_rate": 5e-05, + "loss": 1.7721, + "num_input_tokens_seen": 57295448, + "step": 865 + }, + { + "epoch": 0.08096597556980391, + "loss": 1.9108335971832275, + "loss_ce": 0.005560180637985468, + "loss_iou": 0.796875, + "loss_num": 0.0625, + "loss_xval": 1.90625, + "num_input_tokens_seen": 57295448, + "step": 865 + }, + { + "epoch": 0.08105957785369963, + "grad_norm": 12.68753433227539, + "learning_rate": 5e-05, + "loss": 1.7506, + "num_input_tokens_seen": 57361884, + "step": 866 + }, + { + "epoch": 0.08105957785369963, + "loss": 1.7101788520812988, + "loss_ce": 0.003147585317492485, + "loss_iou": 0.77734375, + "loss_num": 0.031494140625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 57361884, + "step": 866 + }, + { + "epoch": 0.08115318013759536, + "grad_norm": 15.463430404663086, + "learning_rate": 5e-05, + "loss": 1.4298, + "num_input_tokens_seen": 57427388, + "step": 867 + }, + { + "epoch": 0.08115318013759536, + "loss": 1.436558485031128, + "loss_ce": 0.0063826944679021835, + "loss_iou": 0.63671875, + "loss_num": 0.0306396484375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 57427388, + "step": 867 + }, + { + "epoch": 0.08124678242149108, + "grad_norm": 30.34566307067871, + "learning_rate": 5e-05, + "loss": 1.7791, + "num_input_tokens_seen": 57493328, + "step": 868 + }, + { + "epoch": 0.08124678242149108, + "loss": 1.914674162864685, + "loss_ce": 0.004517912864685059, + "loss_iou": 0.84765625, + "loss_num": 0.043212890625, + "loss_xval": 1.90625, + "num_input_tokens_seen": 57493328, + "step": 868 + }, + { + "epoch": 0.0813403847053868, + "grad_norm": 14.137175559997559, + "learning_rate": 5e-05, + "loss": 1.8035, + "num_input_tokens_seen": 57560060, + "step": 869 + }, + { + "epoch": 0.0813403847053868, + "loss": 1.8605313301086426, + "loss_ce": 0.005062506068497896, + "loss_iou": 0.82421875, + "loss_num": 0.0419921875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 57560060, + "step": 869 + }, + { + "epoch": 0.08143398698928254, + "grad_norm": 16.45473289489746, + "learning_rate": 5e-05, + "loss": 1.668, + "num_input_tokens_seen": 57626524, + "step": 870 + }, + { + "epoch": 0.08143398698928254, + "loss": 1.745182991027832, + "loss_ce": 0.002995454939082265, + "loss_iou": 0.76953125, + "loss_num": 0.0400390625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 57626524, + "step": 870 + }, + { + "epoch": 0.08152758927317827, + "grad_norm": 11.80198860168457, + "learning_rate": 5e-05, + "loss": 1.3168, + "num_input_tokens_seen": 57693256, + "step": 871 + }, + { + "epoch": 0.08152758927317827, + "loss": 1.2056353092193604, + "loss_ce": 0.00934632495045662, + "loss_iou": 0.53125, + "loss_num": 0.0264892578125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 57693256, + "step": 871 + }, + { + "epoch": 0.081621191557074, + "grad_norm": 15.624382019042969, + "learning_rate": 5e-05, + "loss": 1.5162, + "num_input_tokens_seen": 57759600, + "step": 872 + }, + { + "epoch": 0.081621191557074, + "loss": 1.4846746921539307, + "loss_ce": 0.0071356394328176975, + "loss_iou": 0.59375, + "loss_num": 0.05810546875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 57759600, + "step": 872 + }, + { + "epoch": 0.08171479384096972, + "grad_norm": 34.51239013671875, + "learning_rate": 5e-05, + "loss": 1.6347, + "num_input_tokens_seen": 57825212, + "step": 873 + }, + { + "epoch": 0.08171479384096972, + "loss": 1.7366607189178467, + "loss_ce": 0.00814506970345974, + "loss_iou": 0.7109375, + "loss_num": 0.0625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 57825212, + "step": 873 + }, + { + "epoch": 0.08180839612486544, + "grad_norm": 10.777180671691895, + "learning_rate": 5e-05, + "loss": 1.984, + "num_input_tokens_seen": 57891160, + "step": 874 + }, + { + "epoch": 0.08180839612486544, + "loss": 1.9887115955352783, + "loss_ce": 0.0043366048485040665, + "loss_iou": 0.8984375, + "loss_num": 0.03759765625, + "loss_xval": 1.984375, + "num_input_tokens_seen": 57891160, + "step": 874 + }, + { + "epoch": 0.08190199840876117, + "grad_norm": 20.751922607421875, + "learning_rate": 5e-05, + "loss": 1.5662, + "num_input_tokens_seen": 57955572, + "step": 875 + }, + { + "epoch": 0.08190199840876117, + "loss": 1.4291478395462036, + "loss_ce": 0.004831395577639341, + "loss_iou": 0.61328125, + "loss_num": 0.0400390625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 57955572, + "step": 875 + }, + { + "epoch": 0.0819956006926569, + "grad_norm": 19.744140625, + "learning_rate": 5e-05, + "loss": 1.898, + "num_input_tokens_seen": 58022716, + "step": 876 + }, + { + "epoch": 0.0819956006926569, + "loss": 2.0219719409942627, + "loss_ce": 0.007323550060391426, + "loss_iou": 0.859375, + "loss_num": 0.05859375, + "loss_xval": 2.015625, + "num_input_tokens_seen": 58022716, + "step": 876 + }, + { + "epoch": 0.08208920297655263, + "grad_norm": 14.733153343200684, + "learning_rate": 5e-05, + "loss": 1.7235, + "num_input_tokens_seen": 58089180, + "step": 877 + }, + { + "epoch": 0.08208920297655263, + "loss": 1.681530237197876, + "loss_ce": 0.004772431682795286, + "loss_iou": 0.71484375, + "loss_num": 0.050048828125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 58089180, + "step": 877 + }, + { + "epoch": 0.08218280526044835, + "grad_norm": 12.678238868713379, + "learning_rate": 5e-05, + "loss": 1.4754, + "num_input_tokens_seen": 58155328, + "step": 878 + }, + { + "epoch": 0.08218280526044835, + "loss": 1.3437457084655762, + "loss_ce": 0.005855005234479904, + "loss_iou": 0.609375, + "loss_num": 0.0238037109375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 58155328, + "step": 878 + }, + { + "epoch": 0.08227640754434408, + "grad_norm": 17.92420196533203, + "learning_rate": 5e-05, + "loss": 1.6522, + "num_input_tokens_seen": 58221740, + "step": 879 + }, + { + "epoch": 0.08227640754434408, + "loss": 1.6979612112045288, + "loss_ce": 0.003625293727964163, + "loss_iou": 0.71875, + "loss_num": 0.05126953125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 58221740, + "step": 879 + }, + { + "epoch": 0.0823700098282398, + "grad_norm": 35.58506774902344, + "learning_rate": 5e-05, + "loss": 1.7944, + "num_input_tokens_seen": 58288656, + "step": 880 + }, + { + "epoch": 0.0823700098282398, + "loss": 1.6057441234588623, + "loss_ce": 0.00418162252753973, + "loss_iou": 0.7265625, + "loss_num": 0.0302734375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 58288656, + "step": 880 + }, + { + "epoch": 0.08246361211213554, + "grad_norm": 18.107791900634766, + "learning_rate": 5e-05, + "loss": 1.855, + "num_input_tokens_seen": 58355404, + "step": 881 + }, + { + "epoch": 0.08246361211213554, + "loss": 1.9610812664031982, + "loss_ce": 0.0030734376050531864, + "loss_iou": 0.89453125, + "loss_num": 0.03466796875, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 58355404, + "step": 881 + }, + { + "epoch": 0.08255721439603127, + "grad_norm": 15.651595115661621, + "learning_rate": 5e-05, + "loss": 1.5946, + "num_input_tokens_seen": 58421224, + "step": 882 + }, + { + "epoch": 0.08255721439603127, + "loss": 1.7618749141693115, + "loss_ce": 0.003085812320932746, + "loss_iou": 0.7578125, + "loss_num": 0.0478515625, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 58421224, + "step": 882 + }, + { + "epoch": 0.08265081667992699, + "grad_norm": 32.5485725402832, + "learning_rate": 5e-05, + "loss": 1.7482, + "num_input_tokens_seen": 58487880, + "step": 883 + }, + { + "epoch": 0.08265081667992699, + "loss": 1.725155234336853, + "loss_ce": 0.003475519362837076, + "loss_iou": 0.7734375, + "loss_num": 0.034912109375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 58487880, + "step": 883 + }, + { + "epoch": 0.08274441896382272, + "grad_norm": 14.286800384521484, + "learning_rate": 5e-05, + "loss": 1.7444, + "num_input_tokens_seen": 58555228, + "step": 884 + }, + { + "epoch": 0.08274441896382272, + "loss": 1.6388722658157349, + "loss_ce": 0.002397665288299322, + "loss_iou": 0.6953125, + "loss_num": 0.04931640625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 58555228, + "step": 884 + }, + { + "epoch": 0.08283802124771844, + "grad_norm": 15.21658706665039, + "learning_rate": 5e-05, + "loss": 1.5943, + "num_input_tokens_seen": 58620780, + "step": 885 + }, + { + "epoch": 0.08283802124771844, + "loss": 1.4916024208068848, + "loss_ce": 0.005762574728578329, + "loss_iou": 0.6640625, + "loss_num": 0.031982421875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 58620780, + "step": 885 + }, + { + "epoch": 0.08293162353161417, + "grad_norm": 31.50017738342285, + "learning_rate": 5e-05, + "loss": 1.5109, + "num_input_tokens_seen": 58687504, + "step": 886 + }, + { + "epoch": 0.08293162353161417, + "loss": 1.4624536037445068, + "loss_ce": 0.0029809139668941498, + "loss_iou": 0.6640625, + "loss_num": 0.0255126953125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 58687504, + "step": 886 + }, + { + "epoch": 0.0830252258155099, + "grad_norm": 17.916427612304688, + "learning_rate": 5e-05, + "loss": 1.8547, + "num_input_tokens_seen": 58753604, + "step": 887 + }, + { + "epoch": 0.0830252258155099, + "loss": 1.8118348121643066, + "loss_ce": 0.0022645373828709126, + "loss_iou": 0.7578125, + "loss_num": 0.05908203125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 58753604, + "step": 887 + }, + { + "epoch": 0.08311882809940563, + "grad_norm": 27.366714477539062, + "learning_rate": 5e-05, + "loss": 2.0535, + "num_input_tokens_seen": 58819832, + "step": 888 + }, + { + "epoch": 0.08311882809940563, + "loss": 2.1956310272216797, + "loss_ce": 0.004224792122840881, + "loss_iou": 0.96484375, + "loss_num": 0.05322265625, + "loss_xval": 2.1875, + "num_input_tokens_seen": 58819832, + "step": 888 + }, + { + "epoch": 0.08321243038330135, + "grad_norm": 15.780597686767578, + "learning_rate": 5e-05, + "loss": 1.7807, + "num_input_tokens_seen": 58885640, + "step": 889 + }, + { + "epoch": 0.08321243038330135, + "loss": 1.7835116386413574, + "loss_ce": 0.0022615769412368536, + "loss_iou": 0.765625, + "loss_num": 0.04931640625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 58885640, + "step": 889 + }, + { + "epoch": 0.08330603266719708, + "grad_norm": 10.075730323791504, + "learning_rate": 5e-05, + "loss": 1.4639, + "num_input_tokens_seen": 58952504, + "step": 890 + }, + { + "epoch": 0.08330603266719708, + "loss": 1.5905461311340332, + "loss_ce": 0.005585219245404005, + "loss_iou": 0.62890625, + "loss_num": 0.06494140625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 58952504, + "step": 890 + }, + { + "epoch": 0.0833996349510928, + "grad_norm": 45.0782585144043, + "learning_rate": 5e-05, + "loss": 1.3896, + "num_input_tokens_seen": 59019296, + "step": 891 + }, + { + "epoch": 0.0833996349510928, + "loss": 1.3953213691711426, + "loss_ce": 0.0012783545535057783, + "loss_iou": 0.60546875, + "loss_num": 0.036376953125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 59019296, + "step": 891 + }, + { + "epoch": 0.08349323723498854, + "grad_norm": 33.37999725341797, + "learning_rate": 5e-05, + "loss": 1.8417, + "num_input_tokens_seen": 59085368, + "step": 892 + }, + { + "epoch": 0.08349323723498854, + "loss": 1.6494685411453247, + "loss_ce": 0.003960754722356796, + "loss_iou": 0.76953125, + "loss_num": 0.021484375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 59085368, + "step": 892 + }, + { + "epoch": 0.08358683951888426, + "grad_norm": 24.009674072265625, + "learning_rate": 5e-05, + "loss": 1.9718, + "num_input_tokens_seen": 59151100, + "step": 893 + }, + { + "epoch": 0.08358683951888426, + "loss": 2.168516159057617, + "loss_ce": 0.004453645087778568, + "loss_iou": 0.9375, + "loss_num": 0.05810546875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 59151100, + "step": 893 + }, + { + "epoch": 0.08368044180277999, + "grad_norm": 12.443723678588867, + "learning_rate": 5e-05, + "loss": 1.4969, + "num_input_tokens_seen": 59217480, + "step": 894 + }, + { + "epoch": 0.08368044180277999, + "loss": 1.652637243270874, + "loss_ce": 0.0022466834634542465, + "loss_iou": 0.72265625, + "loss_num": 0.041015625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 59217480, + "step": 894 + }, + { + "epoch": 0.08377404408667571, + "grad_norm": 11.240812301635742, + "learning_rate": 5e-05, + "loss": 1.5245, + "num_input_tokens_seen": 59282928, + "step": 895 + }, + { + "epoch": 0.08377404408667571, + "loss": 1.4794633388519287, + "loss_ce": 0.00778358755633235, + "loss_iou": 0.578125, + "loss_num": 0.06298828125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 59282928, + "step": 895 + }, + { + "epoch": 0.08386764637057144, + "grad_norm": 14.911636352539062, + "learning_rate": 5e-05, + "loss": 1.6399, + "num_input_tokens_seen": 59348696, + "step": 896 + }, + { + "epoch": 0.08386764637057144, + "loss": 1.6345075368881226, + "loss_ce": 0.0026716054417192936, + "loss_iou": 0.671875, + "loss_num": 0.058349609375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 59348696, + "step": 896 + }, + { + "epoch": 0.08396124865446716, + "grad_norm": 20.957368850708008, + "learning_rate": 5e-05, + "loss": 1.5178, + "num_input_tokens_seen": 59415700, + "step": 897 + }, + { + "epoch": 0.08396124865446716, + "loss": 1.464629054069519, + "loss_ce": 0.0017384872771799564, + "loss_iou": 0.6484375, + "loss_num": 0.033203125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 59415700, + "step": 897 + }, + { + "epoch": 0.0840548509383629, + "grad_norm": 8.092466354370117, + "learning_rate": 5e-05, + "loss": 1.4732, + "num_input_tokens_seen": 59481892, + "step": 898 + }, + { + "epoch": 0.0840548509383629, + "loss": 1.374036431312561, + "loss_ce": 0.009778676554560661, + "loss_iou": 0.5703125, + "loss_num": 0.044189453125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 59481892, + "step": 898 + }, + { + "epoch": 0.08414845322225863, + "grad_norm": 12.05978775024414, + "learning_rate": 5e-05, + "loss": 1.5063, + "num_input_tokens_seen": 59548916, + "step": 899 + }, + { + "epoch": 0.08414845322225863, + "loss": 1.3426138162612915, + "loss_ce": 0.005211510695517063, + "loss_iou": 0.56640625, + "loss_num": 0.041015625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 59548916, + "step": 899 + }, + { + "epoch": 0.08424205550615435, + "grad_norm": 17.22496223449707, + "learning_rate": 5e-05, + "loss": 1.7005, + "num_input_tokens_seen": 59615852, + "step": 900 + }, + { + "epoch": 0.08424205550615435, + "loss": 1.6572275161743164, + "loss_ce": 0.006836886517703533, + "loss_iou": 0.734375, + "loss_num": 0.03662109375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 59615852, + "step": 900 + }, + { + "epoch": 0.08433565779005008, + "grad_norm": 18.689083099365234, + "learning_rate": 5e-05, + "loss": 1.6872, + "num_input_tokens_seen": 59683120, + "step": 901 + }, + { + "epoch": 0.08433565779005008, + "loss": 1.8009494543075562, + "loss_ce": 0.007980726659297943, + "loss_iou": 0.76171875, + "loss_num": 0.05419921875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 59683120, + "step": 901 + }, + { + "epoch": 0.0844292600739458, + "grad_norm": 14.570134162902832, + "learning_rate": 5e-05, + "loss": 1.4228, + "num_input_tokens_seen": 59749288, + "step": 902 + }, + { + "epoch": 0.0844292600739458, + "loss": 1.4552466869354248, + "loss_ce": 0.0044409530237317085, + "loss_iou": 0.64453125, + "loss_num": 0.032470703125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 59749288, + "step": 902 + }, + { + "epoch": 0.08452286235784152, + "grad_norm": 27.418418884277344, + "learning_rate": 5e-05, + "loss": 1.5693, + "num_input_tokens_seen": 59815208, + "step": 903 + }, + { + "epoch": 0.08452286235784152, + "loss": 1.429563045501709, + "loss_ce": 0.005246618762612343, + "loss_iou": 0.62109375, + "loss_num": 0.0361328125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 59815208, + "step": 903 + }, + { + "epoch": 0.08461646464173726, + "grad_norm": 14.06216812133789, + "learning_rate": 5e-05, + "loss": 2.0079, + "num_input_tokens_seen": 59882632, + "step": 904 + }, + { + "epoch": 0.08461646464173726, + "loss": 1.9677444696426392, + "loss_ce": 0.006806934252381325, + "loss_iou": 0.84765625, + "loss_num": 0.052734375, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 59882632, + "step": 904 + }, + { + "epoch": 0.08471006692563299, + "grad_norm": 13.0679292678833, + "learning_rate": 5e-05, + "loss": 1.6538, + "num_input_tokens_seen": 59948612, + "step": 905 + }, + { + "epoch": 0.08471006692563299, + "loss": 1.3045681715011597, + "loss_ce": 0.001986362971365452, + "loss_iou": 0.5546875, + "loss_num": 0.038818359375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 59948612, + "step": 905 + }, + { + "epoch": 0.08480366920952871, + "grad_norm": 25.379179000854492, + "learning_rate": 5e-05, + "loss": 1.6384, + "num_input_tokens_seen": 60014432, + "step": 906 + }, + { + "epoch": 0.08480366920952871, + "loss": 1.6766581535339355, + "loss_ce": 0.007712891325354576, + "loss_iou": 0.71875, + "loss_num": 0.0458984375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 60014432, + "step": 906 + }, + { + "epoch": 0.08489727149342444, + "grad_norm": 18.72802734375, + "learning_rate": 5e-05, + "loss": 1.8672, + "num_input_tokens_seen": 60081776, + "step": 907 + }, + { + "epoch": 0.08489727149342444, + "loss": 1.750260591506958, + "loss_ce": 0.0022137737832963467, + "loss_iou": 0.734375, + "loss_num": 0.055908203125, + "loss_xval": 1.75, + "num_input_tokens_seen": 60081776, + "step": 907 + }, + { + "epoch": 0.08499087377732016, + "grad_norm": 12.83232593536377, + "learning_rate": 5e-05, + "loss": 1.5873, + "num_input_tokens_seen": 60147888, + "step": 908 + }, + { + "epoch": 0.08499087377732016, + "loss": 1.647781252861023, + "loss_ce": 0.0056913672015070915, + "loss_iou": 0.6640625, + "loss_num": 0.0625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 60147888, + "step": 908 + }, + { + "epoch": 0.0850844760612159, + "grad_norm": 23.512590408325195, + "learning_rate": 5e-05, + "loss": 1.5675, + "num_input_tokens_seen": 60214828, + "step": 909 + }, + { + "epoch": 0.0850844760612159, + "loss": 1.4222242832183838, + "loss_ce": 0.0023023297544568777, + "loss_iou": 0.5546875, + "loss_num": 0.0625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 60214828, + "step": 909 + }, + { + "epoch": 0.08517807834511162, + "grad_norm": 23.48229217529297, + "learning_rate": 5e-05, + "loss": 1.5639, + "num_input_tokens_seen": 60281228, + "step": 910 + }, + { + "epoch": 0.08517807834511162, + "loss": 1.6129997968673706, + "loss_ce": 0.0036248769611120224, + "loss_iou": 0.71484375, + "loss_num": 0.036376953125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 60281228, + "step": 910 + }, + { + "epoch": 0.08527168062900735, + "grad_norm": 15.640399932861328, + "learning_rate": 5e-05, + "loss": 1.8778, + "num_input_tokens_seen": 60347096, + "step": 911 + }, + { + "epoch": 0.08527168062900735, + "loss": 1.9476490020751953, + "loss_ce": 0.0042894878424704075, + "loss_iou": 0.8515625, + "loss_num": 0.0478515625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 60347096, + "step": 911 + }, + { + "epoch": 0.08536528291290307, + "grad_norm": 8.834310531616211, + "learning_rate": 5e-05, + "loss": 1.5887, + "num_input_tokens_seen": 60414356, + "step": 912 + }, + { + "epoch": 0.08536528291290307, + "loss": 1.7419757843017578, + "loss_ce": 0.005647665821015835, + "loss_iou": 0.7578125, + "loss_num": 0.044189453125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 60414356, + "step": 912 + }, + { + "epoch": 0.0854588851967988, + "grad_norm": 23.853418350219727, + "learning_rate": 5e-05, + "loss": 1.622, + "num_input_tokens_seen": 60480536, + "step": 913 + }, + { + "epoch": 0.0854588851967988, + "loss": 1.509779930114746, + "loss_ce": 0.002943947445601225, + "loss_iou": 0.67578125, + "loss_num": 0.030517578125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 60480536, + "step": 913 + }, + { + "epoch": 0.08555248748069452, + "grad_norm": 44.07422637939453, + "learning_rate": 5e-05, + "loss": 2.0665, + "num_input_tokens_seen": 60546960, + "step": 914 + }, + { + "epoch": 0.08555248748069452, + "loss": 2.2943153381347656, + "loss_ce": 0.00720581691712141, + "loss_iou": 0.96484375, + "loss_num": 0.072265625, + "loss_xval": 2.28125, + "num_input_tokens_seen": 60546960, + "step": 914 + }, + { + "epoch": 0.08564608976459026, + "grad_norm": 16.99718475341797, + "learning_rate": 5e-05, + "loss": 1.6176, + "num_input_tokens_seen": 60614616, + "step": 915 + }, + { + "epoch": 0.08564608976459026, + "loss": 1.5280823707580566, + "loss_ce": 0.005133206490427256, + "loss_iou": 0.64453125, + "loss_num": 0.04638671875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 60614616, + "step": 915 + }, + { + "epoch": 0.08573969204848599, + "grad_norm": 25.052688598632812, + "learning_rate": 5e-05, + "loss": 1.9113, + "num_input_tokens_seen": 60681420, + "step": 916 + }, + { + "epoch": 0.08573969204848599, + "loss": 2.1192517280578613, + "loss_ce": 0.004994058981537819, + "loss_iou": 0.89453125, + "loss_num": 0.0654296875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 60681420, + "step": 916 + }, + { + "epoch": 0.08583329433238171, + "grad_norm": 15.865578651428223, + "learning_rate": 5e-05, + "loss": 1.9126, + "num_input_tokens_seen": 60747748, + "step": 917 + }, + { + "epoch": 0.08583329433238171, + "loss": 2.0023865699768066, + "loss_ce": 0.006292684003710747, + "loss_iou": 0.84375, + "loss_num": 0.0625, + "loss_xval": 2.0, + "num_input_tokens_seen": 60747748, + "step": 917 + }, + { + "epoch": 0.08592689661627743, + "grad_norm": 15.136137008666992, + "learning_rate": 5e-05, + "loss": 1.3747, + "num_input_tokens_seen": 60812176, + "step": 918 + }, + { + "epoch": 0.08592689661627743, + "loss": 1.3903110027313232, + "loss_ce": 0.003012371016666293, + "loss_iou": 0.5703125, + "loss_num": 0.050048828125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 60812176, + "step": 918 + }, + { + "epoch": 0.08602049890017316, + "grad_norm": 30.088048934936523, + "learning_rate": 5e-05, + "loss": 1.4225, + "num_input_tokens_seen": 60878408, + "step": 919 + }, + { + "epoch": 0.08602049890017316, + "loss": 1.393389105796814, + "loss_ce": 0.003496528370305896, + "loss_iou": 0.6171875, + "loss_num": 0.031982421875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 60878408, + "step": 919 + }, + { + "epoch": 0.0861141011840689, + "grad_norm": 16.893909454345703, + "learning_rate": 5e-05, + "loss": 1.6646, + "num_input_tokens_seen": 60944844, + "step": 920 + }, + { + "epoch": 0.0861141011840689, + "loss": 1.6034700870513916, + "loss_ce": 0.004349029157310724, + "loss_iou": 0.703125, + "loss_num": 0.039306640625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 60944844, + "step": 920 + }, + { + "epoch": 0.08620770346796462, + "grad_norm": 15.906914710998535, + "learning_rate": 5e-05, + "loss": 1.6203, + "num_input_tokens_seen": 61011520, + "step": 921 + }, + { + "epoch": 0.08620770346796462, + "loss": 1.7279441356658936, + "loss_ce": 0.003334770444780588, + "loss_iou": 0.76171875, + "loss_num": 0.04052734375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 61011520, + "step": 921 + }, + { + "epoch": 0.08630130575186035, + "grad_norm": 22.44422721862793, + "learning_rate": 5e-05, + "loss": 1.487, + "num_input_tokens_seen": 61077408, + "step": 922 + }, + { + "epoch": 0.08630130575186035, + "loss": 1.4773905277252197, + "loss_ce": 0.0027811103500425816, + "loss_iou": 0.671875, + "loss_num": 0.0269775390625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 61077408, + "step": 922 + }, + { + "epoch": 0.08639490803575607, + "grad_norm": 15.368884086608887, + "learning_rate": 5e-05, + "loss": 1.1949, + "num_input_tokens_seen": 61141920, + "step": 923 + }, + { + "epoch": 0.08639490803575607, + "loss": 1.0608628988265991, + "loss_ce": 0.005839703604578972, + "loss_iou": 0.4453125, + "loss_num": 0.033203125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 61141920, + "step": 923 + }, + { + "epoch": 0.0864885103196518, + "grad_norm": 22.95160484313965, + "learning_rate": 5e-05, + "loss": 1.5344, + "num_input_tokens_seen": 61208808, + "step": 924 + }, + { + "epoch": 0.0864885103196518, + "loss": 1.5811108350753784, + "loss_ce": 0.003962422721087933, + "loss_iou": 0.67578125, + "loss_num": 0.045654296875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 61208808, + "step": 924 + }, + { + "epoch": 0.08658211260354752, + "grad_norm": 16.989505767822266, + "learning_rate": 5e-05, + "loss": 1.8756, + "num_input_tokens_seen": 61274632, + "step": 925 + }, + { + "epoch": 0.08658211260354752, + "loss": 1.8655545711517334, + "loss_ce": 0.008132727816700935, + "loss_iou": 0.77734375, + "loss_num": 0.060546875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 61274632, + "step": 925 + }, + { + "epoch": 0.08667571488744326, + "grad_norm": 12.435086250305176, + "learning_rate": 5e-05, + "loss": 1.3212, + "num_input_tokens_seen": 61340672, + "step": 926 + }, + { + "epoch": 0.08667571488744326, + "loss": 1.4617154598236084, + "loss_ce": 0.003707642201334238, + "loss_iou": 0.640625, + "loss_num": 0.035888671875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 61340672, + "step": 926 + }, + { + "epoch": 0.08676931717133898, + "grad_norm": 13.720226287841797, + "learning_rate": 5e-05, + "loss": 1.6492, + "num_input_tokens_seen": 61407436, + "step": 927 + }, + { + "epoch": 0.08676931717133898, + "loss": 1.6545886993408203, + "loss_ce": 0.005174613557755947, + "loss_iou": 0.703125, + "loss_num": 0.048583984375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 61407436, + "step": 927 + }, + { + "epoch": 0.08686291945523471, + "grad_norm": 19.40408706665039, + "learning_rate": 5e-05, + "loss": 1.5196, + "num_input_tokens_seen": 61474000, + "step": 928 + }, + { + "epoch": 0.08686291945523471, + "loss": 1.5394935607910156, + "loss_ce": 0.0014076820807531476, + "loss_iou": 0.6796875, + "loss_num": 0.034912109375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 61474000, + "step": 928 + }, + { + "epoch": 0.08695652173913043, + "grad_norm": 40.76962661743164, + "learning_rate": 5e-05, + "loss": 1.67, + "num_input_tokens_seen": 61540732, + "step": 929 + }, + { + "epoch": 0.08695652173913043, + "loss": 1.6722123622894287, + "loss_ce": 0.006196821108460426, + "loss_iou": 0.71875, + "loss_num": 0.0458984375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 61540732, + "step": 929 + }, + { + "epoch": 0.08705012402302616, + "grad_norm": 14.666232109069824, + "learning_rate": 5e-05, + "loss": 2.0639, + "num_input_tokens_seen": 61607380, + "step": 930 + }, + { + "epoch": 0.08705012402302616, + "loss": 2.0115692615509033, + "loss_ce": 0.005709872581064701, + "loss_iou": 0.86328125, + "loss_num": 0.055908203125, + "loss_xval": 2.0, + "num_input_tokens_seen": 61607380, + "step": 930 + }, + { + "epoch": 0.0871437263069219, + "grad_norm": 13.070978164672852, + "learning_rate": 5e-05, + "loss": 1.6374, + "num_input_tokens_seen": 61673452, + "step": 931 + }, + { + "epoch": 0.0871437263069219, + "loss": 1.5387829542160034, + "loss_ce": 0.003626648336648941, + "loss_iou": 0.625, + "loss_num": 0.05712890625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 61673452, + "step": 931 + }, + { + "epoch": 0.08723732859081762, + "grad_norm": 16.796812057495117, + "learning_rate": 5e-05, + "loss": 1.7552, + "num_input_tokens_seen": 61740540, + "step": 932 + }, + { + "epoch": 0.08723732859081762, + "loss": 1.6623833179473877, + "loss_ce": 0.007109887897968292, + "loss_iou": 0.703125, + "loss_num": 0.049560546875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 61740540, + "step": 932 + }, + { + "epoch": 0.08733093087471334, + "grad_norm": 23.889570236206055, + "learning_rate": 5e-05, + "loss": 1.5407, + "num_input_tokens_seen": 61807348, + "step": 933 + }, + { + "epoch": 0.08733093087471334, + "loss": 1.667421817779541, + "loss_ce": 0.0038475480396300554, + "loss_iou": 0.71875, + "loss_num": 0.0458984375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 61807348, + "step": 933 + }, + { + "epoch": 0.08742453315860907, + "grad_norm": 14.975662231445312, + "learning_rate": 5e-05, + "loss": 1.6892, + "num_input_tokens_seen": 61875108, + "step": 934 + }, + { + "epoch": 0.08742453315860907, + "loss": 1.7929887771606445, + "loss_ce": 0.006855999119579792, + "loss_iou": 0.8046875, + "loss_num": 0.034423828125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 61875108, + "step": 934 + }, + { + "epoch": 0.0875181354425048, + "grad_norm": 11.098994255065918, + "learning_rate": 5e-05, + "loss": 1.6709, + "num_input_tokens_seen": 61942064, + "step": 935 + }, + { + "epoch": 0.0875181354425048, + "loss": 1.6905457973480225, + "loss_ce": 0.002557536819949746, + "loss_iou": 0.74609375, + "loss_num": 0.038330078125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 61942064, + "step": 935 + }, + { + "epoch": 0.08761173772640052, + "grad_norm": 14.541089057922363, + "learning_rate": 5e-05, + "loss": 1.3169, + "num_input_tokens_seen": 62007472, + "step": 936 + }, + { + "epoch": 0.08761173772640052, + "loss": 1.4198613166809082, + "loss_ce": 0.007324597332626581, + "loss_iou": 0.5859375, + "loss_num": 0.047607421875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 62007472, + "step": 936 + }, + { + "epoch": 0.08770534001029626, + "grad_norm": 16.678556442260742, + "learning_rate": 5e-05, + "loss": 1.4454, + "num_input_tokens_seen": 62074592, + "step": 937 + }, + { + "epoch": 0.08770534001029626, + "loss": 1.5979841947555542, + "loss_ce": 0.004234226420521736, + "loss_iou": 0.7109375, + "loss_num": 0.033447265625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 62074592, + "step": 937 + }, + { + "epoch": 0.08779894229419198, + "grad_norm": 28.974525451660156, + "learning_rate": 5e-05, + "loss": 1.7548, + "num_input_tokens_seen": 62141012, + "step": 938 + }, + { + "epoch": 0.08779894229419198, + "loss": 1.825490951538086, + "loss_ce": 0.0022488520480692387, + "loss_iou": 0.796875, + "loss_num": 0.046875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 62141012, + "step": 938 + }, + { + "epoch": 0.0878925445780877, + "grad_norm": 39.65151596069336, + "learning_rate": 5e-05, + "loss": 1.8679, + "num_input_tokens_seen": 62205744, + "step": 939 + }, + { + "epoch": 0.0878925445780877, + "loss": 1.9288134574890137, + "loss_ce": 0.0059618037194013596, + "loss_iou": 0.8359375, + "loss_num": 0.05078125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 62205744, + "step": 939 + }, + { + "epoch": 0.08798614686198343, + "grad_norm": 11.894693374633789, + "learning_rate": 5e-05, + "loss": 1.4746, + "num_input_tokens_seen": 62271900, + "step": 940 + }, + { + "epoch": 0.08798614686198343, + "loss": 1.4833482503890991, + "loss_ce": 0.0043443311005830765, + "loss_iou": 0.66015625, + "loss_num": 0.031982421875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 62271900, + "step": 940 + }, + { + "epoch": 0.08807974914587915, + "grad_norm": 18.46121597290039, + "learning_rate": 5e-05, + "loss": 1.368, + "num_input_tokens_seen": 62338956, + "step": 941 + }, + { + "epoch": 0.08807974914587915, + "loss": 1.403991937637329, + "loss_ce": 0.0021365319844335318, + "loss_iou": 0.60546875, + "loss_num": 0.038818359375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 62338956, + "step": 941 + }, + { + "epoch": 0.0881733514297749, + "grad_norm": 15.685604095458984, + "learning_rate": 5e-05, + "loss": 1.5076, + "num_input_tokens_seen": 62405004, + "step": 942 + }, + { + "epoch": 0.0881733514297749, + "loss": 1.4380909204483032, + "loss_ce": 0.0035205574240535498, + "loss_iou": 0.62890625, + "loss_num": 0.03515625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 62405004, + "step": 942 + }, + { + "epoch": 0.08826695371367062, + "grad_norm": 70.2530288696289, + "learning_rate": 5e-05, + "loss": 1.5796, + "num_input_tokens_seen": 62470268, + "step": 943 + }, + { + "epoch": 0.08826695371367062, + "loss": 1.508975863456726, + "loss_ce": 0.005557904951274395, + "loss_iou": 0.625, + "loss_num": 0.05029296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 62470268, + "step": 943 + }, + { + "epoch": 0.08836055599756634, + "grad_norm": 18.887065887451172, + "learning_rate": 5e-05, + "loss": 1.8409, + "num_input_tokens_seen": 62535964, + "step": 944 + }, + { + "epoch": 0.08836055599756634, + "loss": 1.9452190399169922, + "loss_ce": 0.0038127328734844923, + "loss_iou": 0.86328125, + "loss_num": 0.043212890625, + "loss_xval": 1.9375, + "num_input_tokens_seen": 62535964, + "step": 944 + }, + { + "epoch": 0.08845415828146207, + "grad_norm": 15.448012351989746, + "learning_rate": 5e-05, + "loss": 1.7827, + "num_input_tokens_seen": 62601912, + "step": 945 + }, + { + "epoch": 0.08845415828146207, + "loss": 1.796706199645996, + "loss_ce": 0.0030049309134483337, + "loss_iou": 0.765625, + "loss_num": 0.052734375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 62601912, + "step": 945 + }, + { + "epoch": 0.08854776056535779, + "grad_norm": 16.356338500976562, + "learning_rate": 5e-05, + "loss": 1.559, + "num_input_tokens_seen": 62668316, + "step": 946 + }, + { + "epoch": 0.08854776056535779, + "loss": 1.63395357131958, + "loss_ce": 0.005047284997999668, + "loss_iou": 0.734375, + "loss_num": 0.032470703125, + "loss_xval": 1.625, + "num_input_tokens_seen": 62668316, + "step": 946 + }, + { + "epoch": 0.08864136284925352, + "grad_norm": 29.35332489013672, + "learning_rate": 5e-05, + "loss": 1.6491, + "num_input_tokens_seen": 62734160, + "step": 947 + }, + { + "epoch": 0.08864136284925352, + "loss": 1.5638450384140015, + "loss_ce": 0.003603259800001979, + "loss_iou": 0.67578125, + "loss_num": 0.041259765625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 62734160, + "step": 947 + }, + { + "epoch": 0.08873496513314925, + "grad_norm": 14.649731636047363, + "learning_rate": 5e-05, + "loss": 1.9831, + "num_input_tokens_seen": 62800212, + "step": 948 + }, + { + "epoch": 0.08873496513314925, + "loss": 2.0474257469177246, + "loss_ce": 0.0034804297611117363, + "loss_iou": 0.87890625, + "loss_num": 0.05712890625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 62800212, + "step": 948 + }, + { + "epoch": 0.08882856741704498, + "grad_norm": 17.539445877075195, + "learning_rate": 5e-05, + "loss": 1.3548, + "num_input_tokens_seen": 62865900, + "step": 949 + }, + { + "epoch": 0.08882856741704498, + "loss": 0.9725180864334106, + "loss_ce": 0.004164813086390495, + "loss_iou": 0.41015625, + "loss_num": 0.0299072265625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 62865900, + "step": 949 + }, + { + "epoch": 0.0889221697009407, + "grad_norm": 13.716097831726074, + "learning_rate": 5e-05, + "loss": 1.6504, + "num_input_tokens_seen": 62931220, + "step": 950 + }, + { + "epoch": 0.0889221697009407, + "loss": 1.743227481842041, + "loss_ce": 0.0068993838503956795, + "loss_iou": 0.703125, + "loss_num": 0.0654296875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 62931220, + "step": 950 + }, + { + "epoch": 0.08901577198483643, + "grad_norm": 13.76308536529541, + "learning_rate": 5e-05, + "loss": 1.5302, + "num_input_tokens_seen": 62997712, + "step": 951 + }, + { + "epoch": 0.08901577198483643, + "loss": 1.5223302841186523, + "loss_ce": 0.005423550494015217, + "loss_iou": 0.63671875, + "loss_num": 0.048583984375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 62997712, + "step": 951 + }, + { + "epoch": 0.08910937426873215, + "grad_norm": 30.656652450561523, + "learning_rate": 5e-05, + "loss": 1.6095, + "num_input_tokens_seen": 63063788, + "step": 952 + }, + { + "epoch": 0.08910937426873215, + "loss": 1.8086628913879395, + "loss_ce": 0.006905019748955965, + "loss_iou": 0.796875, + "loss_num": 0.041015625, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 63063788, + "step": 952 + }, + { + "epoch": 0.08920297655262788, + "grad_norm": 12.013118743896484, + "learning_rate": 5e-05, + "loss": 1.8896, + "num_input_tokens_seen": 63129584, + "step": 953 + }, + { + "epoch": 0.08920297655262788, + "loss": 1.8647828102111816, + "loss_ce": 0.005529878661036491, + "loss_iou": 0.765625, + "loss_num": 0.06591796875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 63129584, + "step": 953 + }, + { + "epoch": 0.08929657883652362, + "grad_norm": 17.934505462646484, + "learning_rate": 5e-05, + "loss": 1.8149, + "num_input_tokens_seen": 63196748, + "step": 954 + }, + { + "epoch": 0.08929657883652362, + "loss": 1.9400184154510498, + "loss_ce": 0.0064246766269207, + "loss_iou": 0.8203125, + "loss_num": 0.05859375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 63196748, + "step": 954 + }, + { + "epoch": 0.08939018112041934, + "grad_norm": 17.4960880279541, + "learning_rate": 5e-05, + "loss": 1.7313, + "num_input_tokens_seen": 63262920, + "step": 955 + }, + { + "epoch": 0.08939018112041934, + "loss": 1.6163601875305176, + "loss_ce": 0.002346484223380685, + "loss_iou": 0.66796875, + "loss_num": 0.055419921875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 63262920, + "step": 955 + }, + { + "epoch": 0.08948378340431506, + "grad_norm": 23.754989624023438, + "learning_rate": 5e-05, + "loss": 1.7231, + "num_input_tokens_seen": 63330032, + "step": 956 + }, + { + "epoch": 0.08948378340431506, + "loss": 1.6572893857955933, + "loss_ce": 0.004945650231093168, + "loss_iou": 0.734375, + "loss_num": 0.03759765625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 63330032, + "step": 956 + }, + { + "epoch": 0.08957738568821079, + "grad_norm": 21.422574996948242, + "learning_rate": 5e-05, + "loss": 1.6317, + "num_input_tokens_seen": 63396576, + "step": 957 + }, + { + "epoch": 0.08957738568821079, + "loss": 1.9376003742218018, + "loss_ce": 0.0020536172669380903, + "loss_iou": 0.83203125, + "loss_num": 0.054931640625, + "loss_xval": 1.9375, + "num_input_tokens_seen": 63396576, + "step": 957 + }, + { + "epoch": 0.08967098797210651, + "grad_norm": 22.355945587158203, + "learning_rate": 5e-05, + "loss": 1.912, + "num_input_tokens_seen": 63463616, + "step": 958 + }, + { + "epoch": 0.08967098797210651, + "loss": 2.114009380340576, + "loss_ce": 0.00463454844430089, + "loss_iou": 0.90625, + "loss_num": 0.059814453125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 63463616, + "step": 958 + }, + { + "epoch": 0.08976459025600225, + "grad_norm": 18.09412956237793, + "learning_rate": 5e-05, + "loss": 1.5119, + "num_input_tokens_seen": 63530248, + "step": 959 + }, + { + "epoch": 0.08976459025600225, + "loss": 1.5951027870178223, + "loss_ce": 0.008677100762724876, + "loss_iou": 0.640625, + "loss_num": 0.060791015625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 63530248, + "step": 959 + }, + { + "epoch": 0.08985819253989798, + "grad_norm": 20.48488426208496, + "learning_rate": 5e-05, + "loss": 1.6661, + "num_input_tokens_seen": 63596024, + "step": 960 + }, + { + "epoch": 0.08985819253989798, + "loss": 1.8077481985092163, + "loss_ce": 0.005990276113152504, + "loss_iou": 0.76953125, + "loss_num": 0.052734375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 63596024, + "step": 960 + }, + { + "epoch": 0.0899517948237937, + "grad_norm": 17.473188400268555, + "learning_rate": 5e-05, + "loss": 1.5852, + "num_input_tokens_seen": 63662480, + "step": 961 + }, + { + "epoch": 0.0899517948237937, + "loss": 1.464072823524475, + "loss_ce": 0.0070415991358459, + "loss_iou": 0.60546875, + "loss_num": 0.048828125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 63662480, + "step": 961 + }, + { + "epoch": 0.09004539710768943, + "grad_norm": 14.590004920959473, + "learning_rate": 5e-05, + "loss": 1.618, + "num_input_tokens_seen": 63727416, + "step": 962 + }, + { + "epoch": 0.09004539710768943, + "loss": 1.4077198505401611, + "loss_ce": 0.0036671289708465338, + "loss_iou": 0.58984375, + "loss_num": 0.045166015625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 63727416, + "step": 962 + }, + { + "epoch": 0.09013899939158515, + "grad_norm": 21.898130416870117, + "learning_rate": 5e-05, + "loss": 1.5763, + "num_input_tokens_seen": 63794508, + "step": 963 + }, + { + "epoch": 0.09013899939158515, + "loss": 1.5762133598327637, + "loss_ce": 0.003947695717215538, + "loss_iou": 0.70703125, + "loss_num": 0.031494140625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 63794508, + "step": 963 + }, + { + "epoch": 0.09023260167548088, + "grad_norm": 69.45911407470703, + "learning_rate": 5e-05, + "loss": 1.9389, + "num_input_tokens_seen": 63861312, + "step": 964 + }, + { + "epoch": 0.09023260167548088, + "loss": 2.0766565799713135, + "loss_ce": 0.0063440739177167416, + "loss_iou": 0.87109375, + "loss_num": 0.0654296875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 63861312, + "step": 964 + }, + { + "epoch": 0.09032620395937661, + "grad_norm": 16.2425537109375, + "learning_rate": 5e-05, + "loss": 1.6971, + "num_input_tokens_seen": 63927640, + "step": 965 + }, + { + "epoch": 0.09032620395937661, + "loss": 1.727881669998169, + "loss_ce": 0.004248844925314188, + "loss_iou": 0.75, + "loss_num": 0.045166015625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 63927640, + "step": 965 + }, + { + "epoch": 0.09041980624327234, + "grad_norm": 28.23118782043457, + "learning_rate": 5e-05, + "loss": 1.535, + "num_input_tokens_seen": 63993164, + "step": 966 + }, + { + "epoch": 0.09041980624327234, + "loss": 1.6622929573059082, + "loss_ce": 0.006042903289198875, + "loss_iou": 0.75, + "loss_num": 0.031005859375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 63993164, + "step": 966 + }, + { + "epoch": 0.09051340852716806, + "grad_norm": 14.857102394104004, + "learning_rate": 5e-05, + "loss": 1.86, + "num_input_tokens_seen": 64058972, + "step": 967 + }, + { + "epoch": 0.09051340852716806, + "loss": 1.811227560043335, + "loss_ce": 0.0014131104107946157, + "loss_iou": 0.76953125, + "loss_num": 0.053466796875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 64058972, + "step": 967 + }, + { + "epoch": 0.09060701081106379, + "grad_norm": 19.822324752807617, + "learning_rate": 5e-05, + "loss": 1.6783, + "num_input_tokens_seen": 64125460, + "step": 968 + }, + { + "epoch": 0.09060701081106379, + "loss": 1.712416410446167, + "loss_ce": 0.001479036989621818, + "loss_iou": 0.703125, + "loss_num": 0.061279296875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 64125460, + "step": 968 + }, + { + "epoch": 0.09070061309495951, + "grad_norm": 31.154621124267578, + "learning_rate": 5e-05, + "loss": 1.6444, + "num_input_tokens_seen": 64192432, + "step": 969 + }, + { + "epoch": 0.09070061309495951, + "loss": 1.865007996559143, + "loss_ce": 0.002703359816223383, + "loss_iou": 0.8125, + "loss_num": 0.046630859375, + "loss_xval": 1.859375, + "num_input_tokens_seen": 64192432, + "step": 969 + }, + { + "epoch": 0.09079421537885525, + "grad_norm": 14.616561889648438, + "learning_rate": 5e-05, + "loss": 1.8386, + "num_input_tokens_seen": 64258844, + "step": 970 + }, + { + "epoch": 0.09079421537885525, + "loss": 1.719399094581604, + "loss_ce": 0.004555476363748312, + "loss_iou": 0.75390625, + "loss_num": 0.041259765625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 64258844, + "step": 970 + }, + { + "epoch": 0.09088781766275097, + "grad_norm": 14.370298385620117, + "learning_rate": 5e-05, + "loss": 1.4226, + "num_input_tokens_seen": 64324916, + "step": 971 + }, + { + "epoch": 0.09088781766275097, + "loss": 1.3002216815948486, + "loss_ce": 0.002370052505284548, + "loss_iou": 0.51171875, + "loss_num": 0.055419921875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 64324916, + "step": 971 + }, + { + "epoch": 0.0909814199466467, + "grad_norm": 16.304475784301758, + "learning_rate": 5e-05, + "loss": 1.5028, + "num_input_tokens_seen": 64391260, + "step": 972 + }, + { + "epoch": 0.0909814199466467, + "loss": 1.6733556985855103, + "loss_ce": 0.0034338238183408976, + "loss_iou": 0.69921875, + "loss_num": 0.054931640625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 64391260, + "step": 972 + }, + { + "epoch": 0.09107502223054242, + "grad_norm": 16.182010650634766, + "learning_rate": 5e-05, + "loss": 1.8612, + "num_input_tokens_seen": 64458276, + "step": 973 + }, + { + "epoch": 0.09107502223054242, + "loss": 1.7737977504730225, + "loss_ce": 0.004266506526619196, + "loss_iou": 0.75, + "loss_num": 0.053955078125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 64458276, + "step": 973 + }, + { + "epoch": 0.09116862451443815, + "grad_norm": 78.58245086669922, + "learning_rate": 5e-05, + "loss": 1.5493, + "num_input_tokens_seen": 64524384, + "step": 974 + }, + { + "epoch": 0.09116862451443815, + "loss": 1.5430245399475098, + "loss_ce": 0.007868239656090736, + "loss_iou": 0.6796875, + "loss_num": 0.03564453125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 64524384, + "step": 974 + }, + { + "epoch": 0.09126222679833387, + "grad_norm": 28.305130004882812, + "learning_rate": 5e-05, + "loss": 1.5059, + "num_input_tokens_seen": 64590308, + "step": 975 + }, + { + "epoch": 0.09126222679833387, + "loss": 1.351921796798706, + "loss_ce": 0.004998022690415382, + "loss_iou": 0.5703125, + "loss_num": 0.04150390625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 64590308, + "step": 975 + }, + { + "epoch": 0.09135582908222961, + "grad_norm": 27.344154357910156, + "learning_rate": 5e-05, + "loss": 1.6304, + "num_input_tokens_seen": 64657676, + "step": 976 + }, + { + "epoch": 0.09135582908222961, + "loss": 1.6344707012176514, + "loss_ce": 0.0045879255048930645, + "loss_iou": 0.7109375, + "loss_num": 0.041015625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 64657676, + "step": 976 + }, + { + "epoch": 0.09144943136612534, + "grad_norm": 17.74469566345215, + "learning_rate": 5e-05, + "loss": 1.8817, + "num_input_tokens_seen": 64724036, + "step": 977 + }, + { + "epoch": 0.09144943136612534, + "loss": 1.943572998046875, + "loss_ce": 0.00412002531811595, + "loss_iou": 0.828125, + "loss_num": 0.05615234375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 64724036, + "step": 977 + }, + { + "epoch": 0.09154303365002106, + "grad_norm": 11.425139427185059, + "learning_rate": 5e-05, + "loss": 1.5377, + "num_input_tokens_seen": 64789396, + "step": 978 + }, + { + "epoch": 0.09154303365002106, + "loss": 1.3548119068145752, + "loss_ce": 0.003005302045494318, + "loss_iou": 0.5859375, + "loss_num": 0.036376953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 64789396, + "step": 978 + }, + { + "epoch": 0.09163663593391679, + "grad_norm": 21.837615966796875, + "learning_rate": 5e-05, + "loss": 1.6378, + "num_input_tokens_seen": 64855472, + "step": 979 + }, + { + "epoch": 0.09163663593391679, + "loss": 1.6065376996994019, + "loss_ce": 0.006684138905256987, + "loss_iou": 0.671875, + "loss_num": 0.0517578125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 64855472, + "step": 979 + }, + { + "epoch": 0.09173023821781251, + "grad_norm": 25.40251922607422, + "learning_rate": 5e-05, + "loss": 1.4749, + "num_input_tokens_seen": 64922676, + "step": 980 + }, + { + "epoch": 0.09173023821781251, + "loss": 1.4515924453735352, + "loss_ce": 0.005303313955664635, + "loss_iou": 0.59375, + "loss_num": 0.051025390625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 64922676, + "step": 980 + }, + { + "epoch": 0.09182384050170825, + "grad_norm": 22.099281311035156, + "learning_rate": 5e-05, + "loss": 1.8676, + "num_input_tokens_seen": 64988632, + "step": 981 + }, + { + "epoch": 0.09182384050170825, + "loss": 2.053835391998291, + "loss_ce": 0.004030559211969376, + "loss_iou": 0.87890625, + "loss_num": 0.057861328125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 64988632, + "step": 981 + }, + { + "epoch": 0.09191744278560397, + "grad_norm": 15.45548152923584, + "learning_rate": 5e-05, + "loss": 1.4767, + "num_input_tokens_seen": 65054192, + "step": 982 + }, + { + "epoch": 0.09191744278560397, + "loss": 1.357534646987915, + "loss_ce": 0.0018217508913949132, + "loss_iou": 0.5390625, + "loss_num": 0.056396484375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 65054192, + "step": 982 + }, + { + "epoch": 0.0920110450694997, + "grad_norm": 14.510873794555664, + "learning_rate": 5e-05, + "loss": 1.7271, + "num_input_tokens_seen": 65121220, + "step": 983 + }, + { + "epoch": 0.0920110450694997, + "loss": 1.6353819370269775, + "loss_ce": 0.00256940396502614, + "loss_iou": 0.70703125, + "loss_num": 0.0439453125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 65121220, + "step": 983 + }, + { + "epoch": 0.09210464735339542, + "grad_norm": 22.228652954101562, + "learning_rate": 5e-05, + "loss": 1.6905, + "num_input_tokens_seen": 65188016, + "step": 984 + }, + { + "epoch": 0.09210464735339542, + "loss": 1.6479146480560303, + "loss_ce": 0.0038717109709978104, + "loss_iou": 0.7421875, + "loss_num": 0.0311279296875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 65188016, + "step": 984 + }, + { + "epoch": 0.09219824963729115, + "grad_norm": 20.86760139465332, + "learning_rate": 5e-05, + "loss": 1.7681, + "num_input_tokens_seen": 65253952, + "step": 985 + }, + { + "epoch": 0.09219824963729115, + "loss": 1.8290784358978271, + "loss_ce": 0.002906452864408493, + "loss_iou": 0.75390625, + "loss_num": 0.06298828125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 65253952, + "step": 985 + }, + { + "epoch": 0.09229185192118687, + "grad_norm": 21.462255477905273, + "learning_rate": 5e-05, + "loss": 1.8411, + "num_input_tokens_seen": 65320044, + "step": 986 + }, + { + "epoch": 0.09229185192118687, + "loss": 1.9524469375610352, + "loss_ce": 0.00518146064132452, + "loss_iou": 0.86328125, + "loss_num": 0.043212890625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 65320044, + "step": 986 + }, + { + "epoch": 0.09238545420508261, + "grad_norm": 19.662405014038086, + "learning_rate": 5e-05, + "loss": 1.8031, + "num_input_tokens_seen": 65386520, + "step": 987 + }, + { + "epoch": 0.09238545420508261, + "loss": 1.8032032251358032, + "loss_ce": 0.004375106655061245, + "loss_iou": 0.78515625, + "loss_num": 0.045654296875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 65386520, + "step": 987 + }, + { + "epoch": 0.09247905648897833, + "grad_norm": 30.860292434692383, + "learning_rate": 5e-05, + "loss": 1.5476, + "num_input_tokens_seen": 65453280, + "step": 988 + }, + { + "epoch": 0.09247905648897833, + "loss": 1.5351617336273193, + "loss_ce": 0.006841444410383701, + "loss_iou": 0.62890625, + "loss_num": 0.054443359375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 65453280, + "step": 988 + }, + { + "epoch": 0.09257265877287406, + "grad_norm": 17.08321189880371, + "learning_rate": 5e-05, + "loss": 1.5348, + "num_input_tokens_seen": 65519240, + "step": 989 + }, + { + "epoch": 0.09257265877287406, + "loss": 1.6281179189682007, + "loss_ce": 0.005559375509619713, + "loss_iou": 0.71875, + "loss_num": 0.037841796875, + "loss_xval": 1.625, + "num_input_tokens_seen": 65519240, + "step": 989 + }, + { + "epoch": 0.09266626105676978, + "grad_norm": 27.292034149169922, + "learning_rate": 5e-05, + "loss": 1.5467, + "num_input_tokens_seen": 65584904, + "step": 990 + }, + { + "epoch": 0.09266626105676978, + "loss": 1.4985840320587158, + "loss_ce": 0.003466897876933217, + "loss_iou": 0.609375, + "loss_num": 0.056396484375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 65584904, + "step": 990 + }, + { + "epoch": 0.09275986334066551, + "grad_norm": 16.059263229370117, + "learning_rate": 5e-05, + "loss": 2.0579, + "num_input_tokens_seen": 65650572, + "step": 991 + }, + { + "epoch": 0.09275986334066551, + "loss": 2.122138500213623, + "loss_ce": 0.01007784716784954, + "loss_iou": 0.859375, + "loss_num": 0.07861328125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 65650572, + "step": 991 + }, + { + "epoch": 0.09285346562456125, + "grad_norm": 23.014415740966797, + "learning_rate": 5e-05, + "loss": 1.5944, + "num_input_tokens_seen": 65716568, + "step": 992 + }, + { + "epoch": 0.09285346562456125, + "loss": 1.6441433429718018, + "loss_ce": 0.004494882188737392, + "loss_iou": 0.6953125, + "loss_num": 0.0498046875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 65716568, + "step": 992 + }, + { + "epoch": 0.09294706790845697, + "grad_norm": 15.229471206665039, + "learning_rate": 5e-05, + "loss": 1.4162, + "num_input_tokens_seen": 65782944, + "step": 993 + }, + { + "epoch": 0.09294706790845697, + "loss": 1.480794906616211, + "loss_ce": 0.013021371327340603, + "loss_iou": 0.6015625, + "loss_num": 0.05322265625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 65782944, + "step": 993 + }, + { + "epoch": 0.0930406701923527, + "grad_norm": 14.0590238571167, + "learning_rate": 5e-05, + "loss": 1.8864, + "num_input_tokens_seen": 65848924, + "step": 994 + }, + { + "epoch": 0.0930406701923527, + "loss": 1.8647971153259277, + "loss_ce": 0.008351797237992287, + "loss_iou": 0.81640625, + "loss_num": 0.045654296875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 65848924, + "step": 994 + }, + { + "epoch": 0.09313427247624842, + "grad_norm": 18.439361572265625, + "learning_rate": 5e-05, + "loss": 1.4629, + "num_input_tokens_seen": 65914444, + "step": 995 + }, + { + "epoch": 0.09313427247624842, + "loss": 1.2895300388336182, + "loss_ce": 0.002695329487323761, + "loss_iou": 0.5234375, + "loss_num": 0.0478515625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 65914444, + "step": 995 + }, + { + "epoch": 0.09322787476014414, + "grad_norm": 21.675338745117188, + "learning_rate": 5e-05, + "loss": 1.5215, + "num_input_tokens_seen": 65980580, + "step": 996 + }, + { + "epoch": 0.09322787476014414, + "loss": 1.4937338829040527, + "loss_ce": 0.005452618934214115, + "loss_iou": 0.65625, + "loss_num": 0.0361328125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 65980580, + "step": 996 + }, + { + "epoch": 0.09332147704403987, + "grad_norm": 23.317962646484375, + "learning_rate": 5e-05, + "loss": 1.5736, + "num_input_tokens_seen": 66046996, + "step": 997 + }, + { + "epoch": 0.09332147704403987, + "loss": 1.5401999950408936, + "loss_ce": 0.0030905466992408037, + "loss_iou": 0.67578125, + "loss_num": 0.036865234375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 66046996, + "step": 997 + }, + { + "epoch": 0.09341507932793561, + "grad_norm": 17.197994232177734, + "learning_rate": 5e-05, + "loss": 1.8094, + "num_input_tokens_seen": 66113444, + "step": 998 + }, + { + "epoch": 0.09341507932793561, + "loss": 1.8027560710906982, + "loss_ce": 0.003439765190705657, + "loss_iou": 0.75, + "loss_num": 0.06005859375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 66113444, + "step": 998 + }, + { + "epoch": 0.09350868161183133, + "grad_norm": 16.98614501953125, + "learning_rate": 5e-05, + "loss": 1.6693, + "num_input_tokens_seen": 66178728, + "step": 999 + }, + { + "epoch": 0.09350868161183133, + "loss": 1.5198097229003906, + "loss_ce": 0.007114410400390625, + "loss_iou": 0.58984375, + "loss_num": 0.06591796875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 66178728, + "step": 999 + }, + { + "epoch": 0.09360228389572706, + "grad_norm": 16.32314682006836, + "learning_rate": 5e-05, + "loss": 1.5448, + "num_input_tokens_seen": 66245348, + "step": 1000 + }, + { + "epoch": 0.09360228389572706, + "eval_seeclick_CIoU": 0.03466115053743124, + "eval_seeclick_GIoU": 0.012590939877554774, + "eval_seeclick_IoU": 0.18651100993156433, + "eval_seeclick_MAE_all": 0.1619100198149681, + "eval_seeclick_MAE_h": 0.1370251551270485, + "eval_seeclick_MAE_w": 0.13397662341594696, + "eval_seeclick_MAE_x_boxes": 0.2869945168495178, + "eval_seeclick_MAE_y_boxes": 0.0982743352651596, + "eval_seeclick_NUM_probability": 0.9988741278648376, + "eval_seeclick_inside_bbox": 0.3739583343267441, + "eval_seeclick_loss": 2.791210174560547, + "eval_seeclick_loss_ce": 0.01485899556428194, + "eval_seeclick_loss_iou": 0.987060546875, + "eval_seeclick_loss_num": 0.163177490234375, + "eval_seeclick_loss_xval": 2.7919921875, + "eval_seeclick_runtime": 61.7115, + "eval_seeclick_samples_per_second": 0.762, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 66245348, + "step": 1000 + }, + { + "epoch": 0.09360228389572706, + "eval_icons_CIoU": -0.1584959700703621, + "eval_icons_GIoU": -0.08978072926402092, + "eval_icons_IoU": 0.06438343971967697, + "eval_icons_MAE_all": 0.20530376583337784, + "eval_icons_MAE_h": 0.26106757670640945, + "eval_icons_MAE_w": 0.18720195442438126, + "eval_icons_MAE_x_boxes": 0.1660088151693344, + "eval_icons_MAE_y_boxes": 0.09818163141608238, + "eval_icons_NUM_probability": 0.9957078993320465, + "eval_icons_inside_bbox": 0.1180555559694767, + "eval_icons_loss": 3.2430856227874756, + "eval_icons_loss_ce": 0.0015207797405309975, + "eval_icons_loss_iou": 1.114501953125, + "eval_icons_loss_num": 0.2159423828125, + "eval_icons_loss_xval": 3.310546875, + "eval_icons_runtime": 63.9957, + "eval_icons_samples_per_second": 0.781, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 66245348, + "step": 1000 + }, + { + "epoch": 0.09360228389572706, + "eval_screenspot_CIoU": -0.03200580676396688, + "eval_screenspot_GIoU": -0.029922740999609232, + "eval_screenspot_IoU": 0.16735319793224335, + "eval_screenspot_MAE_all": 0.19810542464256287, + "eval_screenspot_MAE_h": 0.19106672704219818, + "eval_screenspot_MAE_w": 0.18381081521511078, + "eval_screenspot_MAE_x_boxes": 0.24538678924242655, + "eval_screenspot_MAE_y_boxes": 0.11639802157878876, + "eval_screenspot_NUM_probability": 0.9960946639378866, + "eval_screenspot_inside_bbox": 0.3937500019868215, + "eval_screenspot_loss": 3.08681058883667, + "eval_screenspot_loss_ce": 0.012565320047239462, + "eval_screenspot_loss_iou": 1.0498046875, + "eval_screenspot_loss_num": 0.20220947265625, + "eval_screenspot_loss_xval": 3.1106770833333335, + "eval_screenspot_runtime": 119.1968, + "eval_screenspot_samples_per_second": 0.747, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 66245348, + "step": 1000 + }, + { + "epoch": 0.09360228389572706, + "eval_compot_CIoU": -0.0126183507964015, + "eval_compot_GIoU": -0.0007607042789459229, + "eval_compot_IoU": 0.1446545273065567, + "eval_compot_MAE_all": 0.14552883058786392, + "eval_compot_MAE_h": 0.07847815565764904, + "eval_compot_MAE_w": 0.15001967549324036, + "eval_compot_MAE_x_boxes": 0.15651856362819672, + "eval_compot_MAE_y_boxes": 0.12546472623944283, + "eval_compot_NUM_probability": 0.9959275722503662, + "eval_compot_inside_bbox": 0.3194444477558136, + "eval_compot_loss": 2.76811146736145, + "eval_compot_loss_ce": 0.00711844814941287, + "eval_compot_loss_iou": 1.0244140625, + "eval_compot_loss_num": 0.15118408203125, + "eval_compot_loss_xval": 2.8056640625, + "eval_compot_runtime": 70.0337, + "eval_compot_samples_per_second": 0.714, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 66245348, + "step": 1000 + }, + { + "epoch": 0.09360228389572706, + "eval_custom_ui_MAE_all": 0.14354324340820312, + "eval_custom_ui_MAE_x": 0.10222059860825539, + "eval_custom_ui_MAE_y": 0.18486589938402176, + "eval_custom_ui_NUM_probability": 0.9998778402805328, + "eval_custom_ui_loss": 0.7289488315582275, + "eval_custom_ui_loss_ce": 0.02813083864748478, + "eval_custom_ui_loss_num": 0.15020751953125, + "eval_custom_ui_loss_xval": 0.75146484375, + "eval_custom_ui_runtime": 51.587, + "eval_custom_ui_samples_per_second": 0.969, + "eval_custom_ui_steps_per_second": 0.039, + "num_input_tokens_seen": 66245348, + "step": 1000 + }, + { + "epoch": 0.09360228389572706, + "loss": 0.8095696568489075, + "loss_ce": 0.03222590684890747, + "loss_iou": 0.0, + "loss_num": 0.1552734375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 66245348, + "step": 1000 + }, + { + "epoch": 0.09369588617962278, + "grad_norm": 22.253944396972656, + "learning_rate": 5e-05, + "loss": 1.4737, + "num_input_tokens_seen": 66310660, + "step": 1001 + }, + { + "epoch": 0.09369588617962278, + "loss": 1.4513487815856934, + "loss_ce": 0.004083187319338322, + "loss_iou": 0.6171875, + "loss_num": 0.04296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 66310660, + "step": 1001 + }, + { + "epoch": 0.0937894884635185, + "grad_norm": 13.835038185119629, + "learning_rate": 5e-05, + "loss": 1.7459, + "num_input_tokens_seen": 66376868, + "step": 1002 + }, + { + "epoch": 0.0937894884635185, + "loss": 1.898911476135254, + "loss_ce": 0.004380341153591871, + "loss_iou": 0.83203125, + "loss_num": 0.04541015625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 66376868, + "step": 1002 + }, + { + "epoch": 0.09388309074741423, + "grad_norm": 15.812870979309082, + "learning_rate": 5e-05, + "loss": 1.3805, + "num_input_tokens_seen": 66443344, + "step": 1003 + }, + { + "epoch": 0.09388309074741423, + "loss": 1.3290150165557861, + "loss_ce": 0.0028431350365281105, + "loss_iou": 0.6015625, + "loss_num": 0.0238037109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 66443344, + "step": 1003 + }, + { + "epoch": 0.09397669303130997, + "grad_norm": 38.81221389770508, + "learning_rate": 5e-05, + "loss": 1.72, + "num_input_tokens_seen": 66509552, + "step": 1004 + }, + { + "epoch": 0.09397669303130997, + "loss": 1.5138416290283203, + "loss_ce": 0.0021228091791272163, + "loss_iou": 0.6953125, + "loss_num": 0.0235595703125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 66509552, + "step": 1004 + }, + { + "epoch": 0.0940702953152057, + "grad_norm": 12.50516414642334, + "learning_rate": 5e-05, + "loss": 1.7761, + "num_input_tokens_seen": 66575992, + "step": 1005 + }, + { + "epoch": 0.0940702953152057, + "loss": 1.655672550201416, + "loss_ce": 0.0037561198696494102, + "loss_iou": 0.69921875, + "loss_num": 0.051025390625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 66575992, + "step": 1005 + }, + { + "epoch": 0.09416389759910142, + "grad_norm": 16.19294548034668, + "learning_rate": 5e-05, + "loss": 1.8414, + "num_input_tokens_seen": 66643048, + "step": 1006 + }, + { + "epoch": 0.09416389759910142, + "loss": 1.7385921478271484, + "loss_ce": 0.003240494290366769, + "loss_iou": 0.765625, + "loss_num": 0.04052734375, + "loss_xval": 1.734375, + "num_input_tokens_seen": 66643048, + "step": 1006 + }, + { + "epoch": 0.09425749988299714, + "grad_norm": 98.92877197265625, + "learning_rate": 5e-05, + "loss": 1.7013, + "num_input_tokens_seen": 66708988, + "step": 1007 + }, + { + "epoch": 0.09425749988299714, + "loss": 1.6064229011535645, + "loss_ce": 0.00779008911922574, + "loss_iou": 0.6640625, + "loss_num": 0.053955078125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 66708988, + "step": 1007 + }, + { + "epoch": 0.09435110216689287, + "grad_norm": 25.077068328857422, + "learning_rate": 5e-05, + "loss": 1.651, + "num_input_tokens_seen": 66774484, + "step": 1008 + }, + { + "epoch": 0.09435110216689287, + "loss": 1.7158145904541016, + "loss_ce": 0.002924002707004547, + "loss_iou": 0.7578125, + "loss_num": 0.039306640625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 66774484, + "step": 1008 + }, + { + "epoch": 0.0944447044507886, + "grad_norm": 12.136792182922363, + "learning_rate": 5e-05, + "loss": 1.8415, + "num_input_tokens_seen": 66840160, + "step": 1009 + }, + { + "epoch": 0.0944447044507886, + "loss": 1.9895738363265991, + "loss_ce": 0.004222306422889233, + "loss_iou": 0.83203125, + "loss_num": 0.06396484375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 66840160, + "step": 1009 + }, + { + "epoch": 0.09453830673468433, + "grad_norm": 18.081323623657227, + "learning_rate": 5e-05, + "loss": 1.4525, + "num_input_tokens_seen": 66906144, + "step": 1010 + }, + { + "epoch": 0.09453830673468433, + "loss": 1.523803949356079, + "loss_ce": 0.004272680729627609, + "loss_iou": 0.62890625, + "loss_num": 0.052978515625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 66906144, + "step": 1010 + }, + { + "epoch": 0.09463190901858005, + "grad_norm": 22.84792137145996, + "learning_rate": 5e-05, + "loss": 1.4413, + "num_input_tokens_seen": 66972860, + "step": 1011 + }, + { + "epoch": 0.09463190901858005, + "loss": 1.464858055114746, + "loss_ce": 0.007826870307326317, + "loss_iou": 0.62109375, + "loss_num": 0.043212890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 66972860, + "step": 1011 + }, + { + "epoch": 0.09472551130247578, + "grad_norm": 18.576574325561523, + "learning_rate": 5e-05, + "loss": 2.0495, + "num_input_tokens_seen": 67039584, + "step": 1012 + }, + { + "epoch": 0.09472551130247578, + "loss": 1.9049363136291504, + "loss_ce": 0.002592559903860092, + "loss_iou": 0.81640625, + "loss_num": 0.0537109375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 67039584, + "step": 1012 + }, + { + "epoch": 0.0948191135863715, + "grad_norm": 16.99166488647461, + "learning_rate": 5e-05, + "loss": 1.5516, + "num_input_tokens_seen": 67106348, + "step": 1013 + }, + { + "epoch": 0.0948191135863715, + "loss": 1.682436466217041, + "loss_ce": 0.007631717249751091, + "loss_iou": 0.70703125, + "loss_num": 0.0517578125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 67106348, + "step": 1013 + }, + { + "epoch": 0.09491271587026723, + "grad_norm": 29.552988052368164, + "learning_rate": 5e-05, + "loss": 1.372, + "num_input_tokens_seen": 67172900, + "step": 1014 + }, + { + "epoch": 0.09491271587026723, + "loss": 1.5988967418670654, + "loss_ce": 0.005146836396306753, + "loss_iou": 0.6953125, + "loss_num": 0.040283203125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 67172900, + "step": 1014 + }, + { + "epoch": 0.09500631815416297, + "grad_norm": 16.46036720275879, + "learning_rate": 5e-05, + "loss": 1.5688, + "num_input_tokens_seen": 67239208, + "step": 1015 + }, + { + "epoch": 0.09500631815416297, + "loss": 1.612764835357666, + "loss_ce": 0.0024134009145200253, + "loss_iou": 0.734375, + "loss_num": 0.0279541015625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 67239208, + "step": 1015 + }, + { + "epoch": 0.09509992043805869, + "grad_norm": 25.499847412109375, + "learning_rate": 5e-05, + "loss": 1.4352, + "num_input_tokens_seen": 67305596, + "step": 1016 + }, + { + "epoch": 0.09509992043805869, + "loss": 1.2837343215942383, + "loss_ce": 0.0063906023278832436, + "loss_iou": 0.55859375, + "loss_num": 0.0322265625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 67305596, + "step": 1016 + }, + { + "epoch": 0.09519352272195442, + "grad_norm": 29.628381729125977, + "learning_rate": 5e-05, + "loss": 1.8951, + "num_input_tokens_seen": 67371004, + "step": 1017 + }, + { + "epoch": 0.09519352272195442, + "loss": 1.9087352752685547, + "loss_ce": 0.0034617616329342127, + "loss_iou": 0.83984375, + "loss_num": 0.04541015625, + "loss_xval": 1.90625, + "num_input_tokens_seen": 67371004, + "step": 1017 + }, + { + "epoch": 0.09528712500585014, + "grad_norm": 23.89756202697754, + "learning_rate": 5e-05, + "loss": 1.4198, + "num_input_tokens_seen": 67437148, + "step": 1018 + }, + { + "epoch": 0.09528712500585014, + "loss": 1.547162413597107, + "loss_ce": 0.0061468021012842655, + "loss_iou": 0.671875, + "loss_num": 0.0390625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 67437148, + "step": 1018 + }, + { + "epoch": 0.09538072728974586, + "grad_norm": 17.92390251159668, + "learning_rate": 5e-05, + "loss": 1.69, + "num_input_tokens_seen": 67502872, + "step": 1019 + }, + { + "epoch": 0.09538072728974586, + "loss": 1.6546707153320312, + "loss_ce": 0.004280084278434515, + "loss_iou": 0.74609375, + "loss_num": 0.0322265625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 67502872, + "step": 1019 + }, + { + "epoch": 0.0954743295736416, + "grad_norm": 16.330259323120117, + "learning_rate": 5e-05, + "loss": 1.6055, + "num_input_tokens_seen": 67569084, + "step": 1020 + }, + { + "epoch": 0.0954743295736416, + "loss": 1.5637905597686768, + "loss_ce": 0.007149962708353996, + "loss_iou": 0.6875, + "loss_num": 0.036376953125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 67569084, + "step": 1020 + }, + { + "epoch": 0.09556793185753733, + "grad_norm": 26.579259872436523, + "learning_rate": 5e-05, + "loss": 1.8511, + "num_input_tokens_seen": 67635584, + "step": 1021 + }, + { + "epoch": 0.09556793185753733, + "loss": 1.7766225337982178, + "loss_ce": 0.007091335952281952, + "loss_iou": 0.73828125, + "loss_num": 0.05908203125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 67635584, + "step": 1021 + }, + { + "epoch": 0.09566153414143305, + "grad_norm": 16.56151008605957, + "learning_rate": 5e-05, + "loss": 1.9727, + "num_input_tokens_seen": 67703132, + "step": 1022 + }, + { + "epoch": 0.09566153414143305, + "loss": 1.791287899017334, + "loss_ce": 0.003201971994712949, + "loss_iou": 0.78515625, + "loss_num": 0.04296875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 67703132, + "step": 1022 + }, + { + "epoch": 0.09575513642532878, + "grad_norm": 14.52706527709961, + "learning_rate": 5e-05, + "loss": 1.4742, + "num_input_tokens_seen": 67768912, + "step": 1023 + }, + { + "epoch": 0.09575513642532878, + "loss": 1.4676893949508667, + "loss_ce": 0.0028456742875277996, + "loss_iou": 0.6484375, + "loss_num": 0.033203125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 67768912, + "step": 1023 + }, + { + "epoch": 0.0958487387092245, + "grad_norm": 61.90464401245117, + "learning_rate": 5e-05, + "loss": 1.5133, + "num_input_tokens_seen": 67834020, + "step": 1024 + }, + { + "epoch": 0.0958487387092245, + "loss": 1.5269737243652344, + "loss_ce": 0.0019493482541292906, + "loss_iou": 0.68359375, + "loss_num": 0.03076171875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 67834020, + "step": 1024 + }, + { + "epoch": 0.09594234099312023, + "grad_norm": 12.636026382446289, + "learning_rate": 5e-05, + "loss": 1.988, + "num_input_tokens_seen": 67900200, + "step": 1025 + }, + { + "epoch": 0.09594234099312023, + "loss": 2.130302906036377, + "loss_ce": 0.002373168943449855, + "loss_iou": 0.8984375, + "loss_num": 0.06640625, + "loss_xval": 2.125, + "num_input_tokens_seen": 67900200, + "step": 1025 + }, + { + "epoch": 0.09603594327701596, + "grad_norm": 17.263626098632812, + "learning_rate": 5e-05, + "loss": 1.6959, + "num_input_tokens_seen": 67965520, + "step": 1026 + }, + { + "epoch": 0.09603594327701596, + "loss": 1.4326584339141846, + "loss_ce": 0.0063889846205711365, + "loss_iou": 0.6015625, + "loss_num": 0.04541015625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 67965520, + "step": 1026 + }, + { + "epoch": 0.09612954556091169, + "grad_norm": 29.37456512451172, + "learning_rate": 5e-05, + "loss": 1.5141, + "num_input_tokens_seen": 68031500, + "step": 1027 + }, + { + "epoch": 0.09612954556091169, + "loss": 1.3183093070983887, + "loss_ce": 0.0039782398380339146, + "loss_iou": 0.55859375, + "loss_num": 0.0390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 68031500, + "step": 1027 + }, + { + "epoch": 0.09622314784480741, + "grad_norm": 16.778858184814453, + "learning_rate": 5e-05, + "loss": 1.6878, + "num_input_tokens_seen": 68098236, + "step": 1028 + }, + { + "epoch": 0.09622314784480741, + "loss": 1.533916711807251, + "loss_ce": 0.0036432542838156223, + "loss_iou": 0.63671875, + "loss_num": 0.051513671875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 68098236, + "step": 1028 + }, + { + "epoch": 0.09631675012870314, + "grad_norm": 69.1749267578125, + "learning_rate": 5e-05, + "loss": 1.5744, + "num_input_tokens_seen": 68164000, + "step": 1029 + }, + { + "epoch": 0.09631675012870314, + "loss": 1.693444013595581, + "loss_ce": 0.00447924854233861, + "loss_iou": 0.734375, + "loss_num": 0.044921875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 68164000, + "step": 1029 + }, + { + "epoch": 0.09641035241259886, + "grad_norm": 15.717203140258789, + "learning_rate": 5e-05, + "loss": 1.7683, + "num_input_tokens_seen": 68230360, + "step": 1030 + }, + { + "epoch": 0.09641035241259886, + "loss": 1.7388529777526855, + "loss_ce": 0.006431044079363346, + "loss_iou": 0.80078125, + "loss_num": 0.0272216796875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 68230360, + "step": 1030 + }, + { + "epoch": 0.0965039546964946, + "grad_norm": 15.72042179107666, + "learning_rate": 5e-05, + "loss": 1.6475, + "num_input_tokens_seen": 68296604, + "step": 1031 + }, + { + "epoch": 0.0965039546964946, + "loss": 1.5739208459854126, + "loss_ce": 0.0036083075683563948, + "loss_iou": 0.671875, + "loss_num": 0.04443359375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 68296604, + "step": 1031 + }, + { + "epoch": 0.09659755698039033, + "grad_norm": 14.753561973571777, + "learning_rate": 5e-05, + "loss": 1.5485, + "num_input_tokens_seen": 68362820, + "step": 1032 + }, + { + "epoch": 0.09659755698039033, + "loss": 1.3991491794586182, + "loss_ce": 0.011453813873231411, + "loss_iou": 0.5859375, + "loss_num": 0.043212890625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 68362820, + "step": 1032 + }, + { + "epoch": 0.09669115926428605, + "grad_norm": 21.489585876464844, + "learning_rate": 5e-05, + "loss": 1.5655, + "num_input_tokens_seen": 68428880, + "step": 1033 + }, + { + "epoch": 0.09669115926428605, + "loss": 1.5785999298095703, + "loss_ce": 0.00438120448961854, + "loss_iou": 0.6640625, + "loss_num": 0.049560546875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 68428880, + "step": 1033 + }, + { + "epoch": 0.09678476154818177, + "grad_norm": 24.65547752380371, + "learning_rate": 5e-05, + "loss": 1.5685, + "num_input_tokens_seen": 68495700, + "step": 1034 + }, + { + "epoch": 0.09678476154818177, + "loss": 1.5529437065124512, + "loss_ce": 0.006068655289709568, + "loss_iou": 0.671875, + "loss_num": 0.041259765625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 68495700, + "step": 1034 + }, + { + "epoch": 0.0968783638320775, + "grad_norm": 22.68483543395996, + "learning_rate": 5e-05, + "loss": 1.5734, + "num_input_tokens_seen": 68561484, + "step": 1035 + }, + { + "epoch": 0.0968783638320775, + "loss": 1.5440155267715454, + "loss_ce": 0.0028777297120541334, + "loss_iou": 0.65625, + "loss_num": 0.04541015625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 68561484, + "step": 1035 + }, + { + "epoch": 0.09697196611597322, + "grad_norm": 18.365602493286133, + "learning_rate": 5e-05, + "loss": 1.71, + "num_input_tokens_seen": 68628544, + "step": 1036 + }, + { + "epoch": 0.09697196611597322, + "loss": 1.703133225440979, + "loss_ce": 0.000984803307801485, + "loss_iou": 0.76953125, + "loss_num": 0.0322265625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 68628544, + "step": 1036 + }, + { + "epoch": 0.09706556839986896, + "grad_norm": 29.792518615722656, + "learning_rate": 5e-05, + "loss": 1.5854, + "num_input_tokens_seen": 68695448, + "step": 1037 + }, + { + "epoch": 0.09706556839986896, + "loss": 1.714836835861206, + "loss_ce": 0.006829025689512491, + "loss_iou": 0.734375, + "loss_num": 0.048583984375, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 68695448, + "step": 1037 + }, + { + "epoch": 0.09715917068376469, + "grad_norm": 16.52050018310547, + "learning_rate": 5e-05, + "loss": 1.6163, + "num_input_tokens_seen": 68761552, + "step": 1038 + }, + { + "epoch": 0.09715917068376469, + "loss": 1.5071520805358887, + "loss_ce": 0.005687330383807421, + "loss_iou": 0.6796875, + "loss_num": 0.0291748046875, + "loss_xval": 1.5, + "num_input_tokens_seen": 68761552, + "step": 1038 + }, + { + "epoch": 0.09725277296766041, + "grad_norm": 15.1870756149292, + "learning_rate": 5e-05, + "loss": 1.6116, + "num_input_tokens_seen": 68827932, + "step": 1039 + }, + { + "epoch": 0.09725277296766041, + "loss": 1.667130947113037, + "loss_ce": 0.005998114589601755, + "loss_iou": 0.68359375, + "loss_num": 0.058349609375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 68827932, + "step": 1039 + }, + { + "epoch": 0.09734637525155614, + "grad_norm": 18.27654266357422, + "learning_rate": 5e-05, + "loss": 1.7088, + "num_input_tokens_seen": 68894556, + "step": 1040 + }, + { + "epoch": 0.09734637525155614, + "loss": 1.6159359216690063, + "loss_ce": 0.0036311917938292027, + "loss_iou": 0.6640625, + "loss_num": 0.056884765625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 68894556, + "step": 1040 + }, + { + "epoch": 0.09743997753545186, + "grad_norm": 32.86941909790039, + "learning_rate": 5e-05, + "loss": 1.5318, + "num_input_tokens_seen": 68959732, + "step": 1041 + }, + { + "epoch": 0.09743997753545186, + "loss": 1.702520489692688, + "loss_ce": 0.005254846997559071, + "loss_iou": 0.734375, + "loss_num": 0.04541015625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 68959732, + "step": 1041 + }, + { + "epoch": 0.0975335798193476, + "grad_norm": 13.069723129272461, + "learning_rate": 5e-05, + "loss": 1.8367, + "num_input_tokens_seen": 69026096, + "step": 1042 + }, + { + "epoch": 0.0975335798193476, + "loss": 2.051509380340576, + "loss_ce": 0.001704599242657423, + "loss_iou": 0.890625, + "loss_num": 0.052734375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 69026096, + "step": 1042 + }, + { + "epoch": 0.09762718210324332, + "grad_norm": 20.314245223999023, + "learning_rate": 5e-05, + "loss": 1.4561, + "num_input_tokens_seen": 69093532, + "step": 1043 + }, + { + "epoch": 0.09762718210324332, + "loss": 1.501004695892334, + "loss_ce": 0.006375749129801989, + "loss_iou": 0.67578125, + "loss_num": 0.0294189453125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 69093532, + "step": 1043 + }, + { + "epoch": 0.09772078438713905, + "grad_norm": 24.271562576293945, + "learning_rate": 5e-05, + "loss": 1.6173, + "num_input_tokens_seen": 69159004, + "step": 1044 + }, + { + "epoch": 0.09772078438713905, + "loss": 1.531834602355957, + "loss_ce": 0.005467416252940893, + "loss_iou": 0.6484375, + "loss_num": 0.0458984375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 69159004, + "step": 1044 + }, + { + "epoch": 0.09781438667103477, + "grad_norm": 18.976776123046875, + "learning_rate": 5e-05, + "loss": 1.799, + "num_input_tokens_seen": 69225708, + "step": 1045 + }, + { + "epoch": 0.09781438667103477, + "loss": 1.7624338865280151, + "loss_ce": 0.00559794157743454, + "loss_iou": 0.7734375, + "loss_num": 0.0419921875, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 69225708, + "step": 1045 + }, + { + "epoch": 0.0979079889549305, + "grad_norm": 12.693644523620605, + "learning_rate": 5e-05, + "loss": 1.6855, + "num_input_tokens_seen": 69291768, + "step": 1046 + }, + { + "epoch": 0.0979079889549305, + "loss": 1.4835333824157715, + "loss_ce": 0.0020880727097392082, + "loss_iou": 0.66015625, + "loss_num": 0.032470703125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 69291768, + "step": 1046 + }, + { + "epoch": 0.09800159123882622, + "grad_norm": 13.826536178588867, + "learning_rate": 5e-05, + "loss": 1.3811, + "num_input_tokens_seen": 69357984, + "step": 1047 + }, + { + "epoch": 0.09800159123882622, + "loss": 0.954777717590332, + "loss_ce": 0.00214099558070302, + "loss_iou": 0.416015625, + "loss_num": 0.0242919921875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 69357984, + "step": 1047 + }, + { + "epoch": 0.09809519352272196, + "grad_norm": 29.4437313079834, + "learning_rate": 5e-05, + "loss": 1.5258, + "num_input_tokens_seen": 69423936, + "step": 1048 + }, + { + "epoch": 0.09809519352272196, + "loss": 1.5787353515625, + "loss_ce": 0.004516693763434887, + "loss_iou": 0.65625, + "loss_num": 0.05224609375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 69423936, + "step": 1048 + }, + { + "epoch": 0.09818879580661768, + "grad_norm": 37.3914909362793, + "learning_rate": 5e-05, + "loss": 1.5761, + "num_input_tokens_seen": 69489296, + "step": 1049 + }, + { + "epoch": 0.09818879580661768, + "loss": 1.8066282272338867, + "loss_ce": 0.0031614694744348526, + "loss_iou": 0.8125, + "loss_num": 0.03564453125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 69489296, + "step": 1049 + }, + { + "epoch": 0.09828239809051341, + "grad_norm": 218.51792907714844, + "learning_rate": 5e-05, + "loss": 1.8707, + "num_input_tokens_seen": 69555684, + "step": 1050 + }, + { + "epoch": 0.09828239809051341, + "loss": 1.7996981143951416, + "loss_ce": 0.006729439832270145, + "loss_iou": 0.8125, + "loss_num": 0.03369140625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 69555684, + "step": 1050 + }, + { + "epoch": 0.09837600037440913, + "grad_norm": 19.0709228515625, + "learning_rate": 5e-05, + "loss": 1.8382, + "num_input_tokens_seen": 69622460, + "step": 1051 + }, + { + "epoch": 0.09837600037440913, + "loss": 1.8147270679473877, + "loss_ce": 0.006133353337645531, + "loss_iou": 0.78125, + "loss_num": 0.0498046875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 69622460, + "step": 1051 + }, + { + "epoch": 0.09846960265830486, + "grad_norm": 11.224459648132324, + "learning_rate": 5e-05, + "loss": 1.5486, + "num_input_tokens_seen": 69688848, + "step": 1052 + }, + { + "epoch": 0.09846960265830486, + "loss": 1.5167632102966309, + "loss_ce": 0.005044479388743639, + "loss_iou": 0.6484375, + "loss_num": 0.04296875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 69688848, + "step": 1052 + }, + { + "epoch": 0.09856320494220058, + "grad_norm": 28.384679794311523, + "learning_rate": 5e-05, + "loss": 1.631, + "num_input_tokens_seen": 69754772, + "step": 1053 + }, + { + "epoch": 0.09856320494220058, + "loss": 1.6727714538574219, + "loss_ce": 0.006755872629582882, + "loss_iou": 0.74609375, + "loss_num": 0.03564453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 69754772, + "step": 1053 + }, + { + "epoch": 0.09865680722609632, + "grad_norm": 17.086957931518555, + "learning_rate": 5e-05, + "loss": 1.7335, + "num_input_tokens_seen": 69820760, + "step": 1054 + }, + { + "epoch": 0.09865680722609632, + "loss": 1.7270066738128662, + "loss_ce": 0.002397257601842284, + "loss_iou": 0.76953125, + "loss_num": 0.037841796875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 69820760, + "step": 1054 + }, + { + "epoch": 0.09875040950999205, + "grad_norm": 31.31211280822754, + "learning_rate": 5e-05, + "loss": 1.4944, + "num_input_tokens_seen": 69886556, + "step": 1055 + }, + { + "epoch": 0.09875040950999205, + "loss": 1.4780758619308472, + "loss_ce": 0.0074948593974113464, + "loss_iou": 0.56640625, + "loss_num": 0.06787109375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 69886556, + "step": 1055 + }, + { + "epoch": 0.09884401179388777, + "grad_norm": 18.1871337890625, + "learning_rate": 5e-05, + "loss": 1.8154, + "num_input_tokens_seen": 69953852, + "step": 1056 + }, + { + "epoch": 0.09884401179388777, + "loss": 1.6344540119171143, + "loss_ce": 0.004571170080453157, + "loss_iou": 0.703125, + "loss_num": 0.04541015625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 69953852, + "step": 1056 + }, + { + "epoch": 0.0989376140777835, + "grad_norm": 11.456923484802246, + "learning_rate": 5e-05, + "loss": 1.512, + "num_input_tokens_seen": 70020220, + "step": 1057 + }, + { + "epoch": 0.0989376140777835, + "loss": 1.4629371166229248, + "loss_ce": 0.004929323680698872, + "loss_iou": 0.65625, + "loss_num": 0.028564453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 70020220, + "step": 1057 + }, + { + "epoch": 0.09903121636167922, + "grad_norm": 17.7427921295166, + "learning_rate": 5e-05, + "loss": 1.6399, + "num_input_tokens_seen": 70086860, + "step": 1058 + }, + { + "epoch": 0.09903121636167922, + "loss": 1.7626097202301025, + "loss_ce": 0.0038206689059734344, + "loss_iou": 0.73828125, + "loss_num": 0.055908203125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 70086860, + "step": 1058 + }, + { + "epoch": 0.09912481864557496, + "grad_norm": 34.55023193359375, + "learning_rate": 5e-05, + "loss": 1.7016, + "num_input_tokens_seen": 70153744, + "step": 1059 + }, + { + "epoch": 0.09912481864557496, + "loss": 1.7080399990081787, + "loss_ce": 0.0029619333799928427, + "loss_iou": 0.765625, + "loss_num": 0.035400390625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 70153744, + "step": 1059 + }, + { + "epoch": 0.09921842092947068, + "grad_norm": 13.886670112609863, + "learning_rate": 5e-05, + "loss": 1.7789, + "num_input_tokens_seen": 70221132, + "step": 1060 + }, + { + "epoch": 0.09921842092947068, + "loss": 1.9111485481262207, + "loss_ce": 0.006851662881672382, + "loss_iou": 0.8203125, + "loss_num": 0.053466796875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 70221132, + "step": 1060 + }, + { + "epoch": 0.09931202321336641, + "grad_norm": 19.265901565551758, + "learning_rate": 5e-05, + "loss": 1.4447, + "num_input_tokens_seen": 70286868, + "step": 1061 + }, + { + "epoch": 0.09931202321336641, + "loss": 1.4278239011764526, + "loss_ce": 0.003507505403831601, + "loss_iou": 0.59765625, + "loss_num": 0.0458984375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 70286868, + "step": 1061 + }, + { + "epoch": 0.09940562549726213, + "grad_norm": 23.00331687927246, + "learning_rate": 5e-05, + "loss": 1.6059, + "num_input_tokens_seen": 70354820, + "step": 1062 + }, + { + "epoch": 0.09940562549726213, + "loss": 1.4556865692138672, + "loss_ce": 0.0035380988847464323, + "loss_iou": 0.60546875, + "loss_num": 0.048583984375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 70354820, + "step": 1062 + }, + { + "epoch": 0.09949922778115786, + "grad_norm": 19.529693603515625, + "learning_rate": 5e-05, + "loss": 1.8307, + "num_input_tokens_seen": 70419364, + "step": 1063 + }, + { + "epoch": 0.09949922778115786, + "loss": 2.0091640949249268, + "loss_ce": 0.0033047685865312815, + "loss_iou": 0.86328125, + "loss_num": 0.0556640625, + "loss_xval": 2.0, + "num_input_tokens_seen": 70419364, + "step": 1063 + }, + { + "epoch": 0.09959283006505358, + "grad_norm": 19.776260375976562, + "learning_rate": 5e-05, + "loss": 1.628, + "num_input_tokens_seen": 70485440, + "step": 1064 + }, + { + "epoch": 0.09959283006505358, + "loss": 1.560286045074463, + "loss_ce": 0.004622036591172218, + "loss_iou": 0.6875, + "loss_num": 0.036376953125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 70485440, + "step": 1064 + }, + { + "epoch": 0.09968643234894932, + "grad_norm": 39.927310943603516, + "learning_rate": 5e-05, + "loss": 1.5625, + "num_input_tokens_seen": 70552364, + "step": 1065 + }, + { + "epoch": 0.09968643234894932, + "loss": 1.6188111305236816, + "loss_ce": 0.009436175227165222, + "loss_iou": 0.6875, + "loss_num": 0.047119140625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 70552364, + "step": 1065 + }, + { + "epoch": 0.09978003463284504, + "grad_norm": 15.731878280639648, + "learning_rate": 5e-05, + "loss": 2.0476, + "num_input_tokens_seen": 70618948, + "step": 1066 + }, + { + "epoch": 0.09978003463284504, + "loss": 1.9860522747039795, + "loss_ce": 0.008513141423463821, + "loss_iou": 0.81640625, + "loss_num": 0.068359375, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 70618948, + "step": 1066 + }, + { + "epoch": 0.09987363691674077, + "grad_norm": 16.6303653717041, + "learning_rate": 5e-05, + "loss": 1.5697, + "num_input_tokens_seen": 70685280, + "step": 1067 + }, + { + "epoch": 0.09987363691674077, + "loss": 1.6117045879364014, + "loss_ce": 0.0033061308786273003, + "loss_iou": 0.71875, + "loss_num": 0.03369140625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 70685280, + "step": 1067 + }, + { + "epoch": 0.09996723920063649, + "grad_norm": 32.4035758972168, + "learning_rate": 5e-05, + "loss": 1.4768, + "num_input_tokens_seen": 70751844, + "step": 1068 + }, + { + "epoch": 0.09996723920063649, + "loss": 1.5377182960510254, + "loss_ce": 0.003050293307751417, + "loss_iou": 0.62109375, + "loss_num": 0.058349609375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 70751844, + "step": 1068 + }, + { + "epoch": 0.10006084148453222, + "grad_norm": 16.004093170166016, + "learning_rate": 5e-05, + "loss": 1.7652, + "num_input_tokens_seen": 70817780, + "step": 1069 + }, + { + "epoch": 0.10006084148453222, + "loss": 1.643068790435791, + "loss_ce": 0.0023826994001865387, + "loss_iou": 0.7109375, + "loss_num": 0.04296875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 70817780, + "step": 1069 + }, + { + "epoch": 0.10015444376842796, + "grad_norm": 19.020105361938477, + "learning_rate": 5e-05, + "loss": 1.3034, + "num_input_tokens_seen": 70883888, + "step": 1070 + }, + { + "epoch": 0.10015444376842796, + "loss": 1.3595044612884521, + "loss_ce": 0.002082610735669732, + "loss_iou": 0.5859375, + "loss_num": 0.03662109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 70883888, + "step": 1070 + }, + { + "epoch": 0.10024804605232368, + "grad_norm": 46.259517669677734, + "learning_rate": 5e-05, + "loss": 1.8026, + "num_input_tokens_seen": 70950500, + "step": 1071 + }, + { + "epoch": 0.10024804605232368, + "loss": 1.6754218339920044, + "loss_ce": 0.0035468616988509893, + "loss_iou": 0.71484375, + "loss_num": 0.049072265625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 70950500, + "step": 1071 + }, + { + "epoch": 0.1003416483362194, + "grad_norm": 20.967409133911133, + "learning_rate": 5e-05, + "loss": 1.8289, + "num_input_tokens_seen": 71015636, + "step": 1072 + }, + { + "epoch": 0.1003416483362194, + "loss": 1.8284236192703247, + "loss_ce": 0.004204882774502039, + "loss_iou": 0.7890625, + "loss_num": 0.04833984375, + "loss_xval": 1.828125, + "num_input_tokens_seen": 71015636, + "step": 1072 + }, + { + "epoch": 0.10043525062011513, + "grad_norm": 15.622443199157715, + "learning_rate": 5e-05, + "loss": 1.485, + "num_input_tokens_seen": 71082532, + "step": 1073 + }, + { + "epoch": 0.10043525062011513, + "loss": 1.4984934329986572, + "loss_ce": 0.004841048736125231, + "loss_iou": 0.65234375, + "loss_num": 0.037841796875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 71082532, + "step": 1073 + }, + { + "epoch": 0.10052885290401085, + "grad_norm": 20.51572036743164, + "learning_rate": 5e-05, + "loss": 1.592, + "num_input_tokens_seen": 71148244, + "step": 1074 + }, + { + "epoch": 0.10052885290401085, + "loss": 1.701202392578125, + "loss_ce": 0.004424963146448135, + "loss_iou": 0.7421875, + "loss_num": 0.042236328125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 71148244, + "step": 1074 + }, + { + "epoch": 0.10062245518790658, + "grad_norm": 21.5648250579834, + "learning_rate": 5e-05, + "loss": 1.5863, + "num_input_tokens_seen": 71214616, + "step": 1075 + }, + { + "epoch": 0.10062245518790658, + "loss": 1.763230800628662, + "loss_ce": 0.009324410930275917, + "loss_iou": 0.79296875, + "loss_num": 0.032958984375, + "loss_xval": 1.75, + "num_input_tokens_seen": 71214616, + "step": 1075 + }, + { + "epoch": 0.10071605747180232, + "grad_norm": 28.579771041870117, + "learning_rate": 5e-05, + "loss": 1.4247, + "num_input_tokens_seen": 71280288, + "step": 1076 + }, + { + "epoch": 0.10071605747180232, + "loss": 1.4551047086715698, + "loss_ce": 0.009792180731892586, + "loss_iou": 0.64453125, + "loss_num": 0.0306396484375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 71280288, + "step": 1076 + }, + { + "epoch": 0.10080965975569804, + "grad_norm": 18.032018661499023, + "learning_rate": 5e-05, + "loss": 1.824, + "num_input_tokens_seen": 71346528, + "step": 1077 + }, + { + "epoch": 0.10080965975569804, + "loss": 1.9830670356750488, + "loss_ce": 0.007481178268790245, + "loss_iou": 0.80859375, + "loss_num": 0.07177734375, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 71346528, + "step": 1077 + }, + { + "epoch": 0.10090326203959377, + "grad_norm": 21.930185317993164, + "learning_rate": 5e-05, + "loss": 1.3254, + "num_input_tokens_seen": 71412284, + "step": 1078 + }, + { + "epoch": 0.10090326203959377, + "loss": 1.3273941278457642, + "loss_ce": 0.001222223974764347, + "loss_iou": 0.59375, + "loss_num": 0.02783203125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 71412284, + "step": 1078 + }, + { + "epoch": 0.10099686432348949, + "grad_norm": 20.907901763916016, + "learning_rate": 5e-05, + "loss": 1.4868, + "num_input_tokens_seen": 71478152, + "step": 1079 + }, + { + "epoch": 0.10099686432348949, + "loss": 1.4336016178131104, + "loss_ce": 0.012947340495884418, + "loss_iou": 0.578125, + "loss_num": 0.0537109375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 71478152, + "step": 1079 + }, + { + "epoch": 0.10109046660738522, + "grad_norm": 13.136309623718262, + "learning_rate": 5e-05, + "loss": 1.4114, + "num_input_tokens_seen": 71544900, + "step": 1080 + }, + { + "epoch": 0.10109046660738522, + "loss": 1.6138828992843628, + "loss_ce": 0.006949296221137047, + "loss_iou": 0.6640625, + "loss_num": 0.056884765625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 71544900, + "step": 1080 + }, + { + "epoch": 0.10118406889128095, + "grad_norm": 78.87405395507812, + "learning_rate": 5e-05, + "loss": 1.6984, + "num_input_tokens_seen": 71610936, + "step": 1081 + }, + { + "epoch": 0.10118406889128095, + "loss": 1.7695459127426147, + "loss_ce": 0.007827112451195717, + "loss_iou": 0.75, + "loss_num": 0.052001953125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 71610936, + "step": 1081 + }, + { + "epoch": 0.10127767117517668, + "grad_norm": 14.12183952331543, + "learning_rate": 5e-05, + "loss": 1.5883, + "num_input_tokens_seen": 71677840, + "step": 1082 + }, + { + "epoch": 0.10127767117517668, + "loss": 1.5694940090179443, + "loss_ce": 0.005285100545734167, + "loss_iou": 0.69921875, + "loss_num": 0.032470703125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 71677840, + "step": 1082 + }, + { + "epoch": 0.1013712734590724, + "grad_norm": 17.401836395263672, + "learning_rate": 5e-05, + "loss": 1.6485, + "num_input_tokens_seen": 71743168, + "step": 1083 + }, + { + "epoch": 0.1013712734590724, + "loss": 1.7952377796173096, + "loss_ce": 0.0071518197655677795, + "loss_iou": 0.74609375, + "loss_num": 0.059326171875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 71743168, + "step": 1083 + }, + { + "epoch": 0.10146487574296813, + "grad_norm": 29.285566329956055, + "learning_rate": 5e-05, + "loss": 1.7169, + "num_input_tokens_seen": 71809468, + "step": 1084 + }, + { + "epoch": 0.10146487574296813, + "loss": 1.5882303714752197, + "loss_ce": 0.006199192255735397, + "loss_iou": 0.69140625, + "loss_num": 0.040283203125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 71809468, + "step": 1084 + }, + { + "epoch": 0.10155847802686385, + "grad_norm": 21.07638168334961, + "learning_rate": 5e-05, + "loss": 1.5334, + "num_input_tokens_seen": 71874744, + "step": 1085 + }, + { + "epoch": 0.10155847802686385, + "loss": 1.4922783374786377, + "loss_ce": 0.004973745439201593, + "loss_iou": 0.66015625, + "loss_num": 0.033935546875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 71874744, + "step": 1085 + }, + { + "epoch": 0.10165208031075958, + "grad_norm": 35.0526237487793, + "learning_rate": 5e-05, + "loss": 1.4507, + "num_input_tokens_seen": 71940584, + "step": 1086 + }, + { + "epoch": 0.10165208031075958, + "loss": 1.3636212348937988, + "loss_ce": 0.009617269039154053, + "loss_iou": 0.5625, + "loss_num": 0.045654296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 71940584, + "step": 1086 + }, + { + "epoch": 0.10174568259465532, + "grad_norm": 21.897987365722656, + "learning_rate": 5e-05, + "loss": 1.6272, + "num_input_tokens_seen": 72005496, + "step": 1087 + }, + { + "epoch": 0.10174568259465532, + "loss": 1.599699854850769, + "loss_ce": 0.0035084174014627934, + "loss_iou": 0.65234375, + "loss_num": 0.057861328125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 72005496, + "step": 1087 + }, + { + "epoch": 0.10183928487855104, + "grad_norm": 26.753427505493164, + "learning_rate": 5e-05, + "loss": 1.6557, + "num_input_tokens_seen": 72072488, + "step": 1088 + }, + { + "epoch": 0.10183928487855104, + "loss": 1.7499940395355225, + "loss_ce": 0.007318269927054644, + "loss_iou": 0.73828125, + "loss_num": 0.052001953125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 72072488, + "step": 1088 + }, + { + "epoch": 0.10193288716244676, + "grad_norm": 19.3857479095459, + "learning_rate": 5e-05, + "loss": 1.5798, + "num_input_tokens_seen": 72137396, + "step": 1089 + }, + { + "epoch": 0.10193288716244676, + "loss": 1.4376049041748047, + "loss_ce": 0.001081545022316277, + "loss_iou": 0.625, + "loss_num": 0.037353515625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 72137396, + "step": 1089 + }, + { + "epoch": 0.10202648944634249, + "grad_norm": 10.583497047424316, + "learning_rate": 5e-05, + "loss": 1.3534, + "num_input_tokens_seen": 72203864, + "step": 1090 + }, + { + "epoch": 0.10202648944634249, + "loss": 1.4205148220062256, + "loss_ce": 0.005963992327451706, + "loss_iou": 0.60546875, + "loss_num": 0.04052734375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 72203864, + "step": 1090 + }, + { + "epoch": 0.10212009173023821, + "grad_norm": 30.084596633911133, + "learning_rate": 5e-05, + "loss": 1.5118, + "num_input_tokens_seen": 72270124, + "step": 1091 + }, + { + "epoch": 0.10212009173023821, + "loss": 1.6620097160339355, + "loss_ce": 0.011619108729064465, + "loss_iou": 0.7265625, + "loss_num": 0.0400390625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 72270124, + "step": 1091 + }, + { + "epoch": 0.10221369401413395, + "grad_norm": 36.06584930419922, + "learning_rate": 5e-05, + "loss": 1.4752, + "num_input_tokens_seen": 72336408, + "step": 1092 + }, + { + "epoch": 0.10221369401413395, + "loss": 1.4394707679748535, + "loss_ce": 0.006121148355305195, + "loss_iou": 0.6328125, + "loss_num": 0.03271484375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 72336408, + "step": 1092 + }, + { + "epoch": 0.10230729629802968, + "grad_norm": 13.94032096862793, + "learning_rate": 5e-05, + "loss": 1.7626, + "num_input_tokens_seen": 72399836, + "step": 1093 + }, + { + "epoch": 0.10230729629802968, + "loss": 1.9253475666046143, + "loss_ce": 0.0034724862780421972, + "loss_iou": 0.86328125, + "loss_num": 0.039794921875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 72399836, + "step": 1093 + }, + { + "epoch": 0.1024008985819254, + "grad_norm": 19.62771224975586, + "learning_rate": 5e-05, + "loss": 1.5887, + "num_input_tokens_seen": 72466636, + "step": 1094 + }, + { + "epoch": 0.1024008985819254, + "loss": 1.4443734884262085, + "loss_ce": 0.005042454227805138, + "loss_iou": 0.609375, + "loss_num": 0.04345703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 72466636, + "step": 1094 + }, + { + "epoch": 0.10249450086582113, + "grad_norm": 25.884469985961914, + "learning_rate": 5e-05, + "loss": 1.3426, + "num_input_tokens_seen": 72533320, + "step": 1095 + }, + { + "epoch": 0.10249450086582113, + "loss": 1.1323490142822266, + "loss_ce": 0.0034426813945174217, + "loss_iou": 0.5078125, + "loss_num": 0.023193359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 72533320, + "step": 1095 + }, + { + "epoch": 0.10258810314971685, + "grad_norm": 26.165925979614258, + "learning_rate": 5e-05, + "loss": 1.6928, + "num_input_tokens_seen": 72599888, + "step": 1096 + }, + { + "epoch": 0.10258810314971685, + "loss": 1.6118435859680176, + "loss_ce": 0.005398129113018513, + "loss_iou": 0.703125, + "loss_num": 0.03955078125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 72599888, + "step": 1096 + }, + { + "epoch": 0.10268170543361257, + "grad_norm": 28.40653419494629, + "learning_rate": 5e-05, + "loss": 1.6217, + "num_input_tokens_seen": 72666576, + "step": 1097 + }, + { + "epoch": 0.10268170543361257, + "loss": 1.5436112880706787, + "loss_ce": 0.0033280765637755394, + "loss_iou": 0.65234375, + "loss_num": 0.04736328125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 72666576, + "step": 1097 + }, + { + "epoch": 0.10277530771750831, + "grad_norm": 10.888528823852539, + "learning_rate": 5e-05, + "loss": 1.2432, + "num_input_tokens_seen": 72732152, + "step": 1098 + }, + { + "epoch": 0.10277530771750831, + "loss": 1.4281492233276367, + "loss_ce": 0.0052976300939917564, + "loss_iou": 0.625, + "loss_num": 0.03466796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 72732152, + "step": 1098 + }, + { + "epoch": 0.10286891000140404, + "grad_norm": 24.421236038208008, + "learning_rate": 5e-05, + "loss": 1.4365, + "num_input_tokens_seen": 72797844, + "step": 1099 + }, + { + "epoch": 0.10286891000140404, + "loss": 1.8412877321243286, + "loss_ce": 0.00437368405982852, + "loss_iou": 0.7578125, + "loss_num": 0.0634765625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 72797844, + "step": 1099 + }, + { + "epoch": 0.10296251228529976, + "grad_norm": 46.921199798583984, + "learning_rate": 5e-05, + "loss": 1.4816, + "num_input_tokens_seen": 72863276, + "step": 1100 + }, + { + "epoch": 0.10296251228529976, + "loss": 1.644058108329773, + "loss_ce": 0.008804120123386383, + "loss_iou": 0.67578125, + "loss_num": 0.056884765625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 72863276, + "step": 1100 + }, + { + "epoch": 0.10305611456919549, + "grad_norm": 16.586036682128906, + "learning_rate": 5e-05, + "loss": 1.4811, + "num_input_tokens_seen": 72928792, + "step": 1101 + }, + { + "epoch": 0.10305611456919549, + "loss": 1.5740876197814941, + "loss_ce": 0.0037751158233731985, + "loss_iou": 0.6640625, + "loss_num": 0.04833984375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 72928792, + "step": 1101 + }, + { + "epoch": 0.10314971685309121, + "grad_norm": 37.889278411865234, + "learning_rate": 5e-05, + "loss": 1.5914, + "num_input_tokens_seen": 72995832, + "step": 1102 + }, + { + "epoch": 0.10314971685309121, + "loss": 1.3846938610076904, + "loss_ce": 0.00578762823715806, + "loss_iou": 0.6328125, + "loss_num": 0.022705078125, + "loss_xval": 1.375, + "num_input_tokens_seen": 72995832, + "step": 1102 + }, + { + "epoch": 0.10324331913698694, + "grad_norm": 15.632954597473145, + "learning_rate": 5e-05, + "loss": 1.9994, + "num_input_tokens_seen": 73062164, + "step": 1103 + }, + { + "epoch": 0.10324331913698694, + "loss": 1.82685124874115, + "loss_ce": 0.00165596604347229, + "loss_iou": 0.7890625, + "loss_num": 0.05029296875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 73062164, + "step": 1103 + }, + { + "epoch": 0.10333692142088267, + "grad_norm": 29.71272850036621, + "learning_rate": 5e-05, + "loss": 1.5723, + "num_input_tokens_seen": 73128172, + "step": 1104 + }, + { + "epoch": 0.10333692142088267, + "loss": 1.4010519981384277, + "loss_ce": 0.006642777472734451, + "loss_iou": 0.60546875, + "loss_num": 0.037109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 73128172, + "step": 1104 + }, + { + "epoch": 0.1034305237047784, + "grad_norm": 18.238737106323242, + "learning_rate": 5e-05, + "loss": 1.5136, + "num_input_tokens_seen": 73195452, + "step": 1105 + }, + { + "epoch": 0.1034305237047784, + "loss": 1.4810616970062256, + "loss_ce": 0.00156954035628587, + "loss_iou": 0.66015625, + "loss_num": 0.03271484375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 73195452, + "step": 1105 + }, + { + "epoch": 0.10352412598867412, + "grad_norm": 18.990901947021484, + "learning_rate": 5e-05, + "loss": 1.613, + "num_input_tokens_seen": 73262228, + "step": 1106 + }, + { + "epoch": 0.10352412598867412, + "loss": 1.7205805778503418, + "loss_ce": 0.0037837019190192223, + "loss_iou": 0.7578125, + "loss_num": 0.04052734375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 73262228, + "step": 1106 + }, + { + "epoch": 0.10361772827256985, + "grad_norm": 39.51871109008789, + "learning_rate": 5e-05, + "loss": 1.6039, + "num_input_tokens_seen": 73329256, + "step": 1107 + }, + { + "epoch": 0.10361772827256985, + "loss": 1.646957516670227, + "loss_ce": 0.004379414487630129, + "loss_iou": 0.71875, + "loss_num": 0.041259765625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 73329256, + "step": 1107 + }, + { + "epoch": 0.10371133055646557, + "grad_norm": 16.212440490722656, + "learning_rate": 5e-05, + "loss": 1.8582, + "num_input_tokens_seen": 73395804, + "step": 1108 + }, + { + "epoch": 0.10371133055646557, + "loss": 1.9633493423461914, + "loss_ce": 0.0033883987925946712, + "loss_iou": 0.86328125, + "loss_num": 0.046142578125, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 73395804, + "step": 1108 + }, + { + "epoch": 0.10380493284036131, + "grad_norm": 28.32777976989746, + "learning_rate": 5e-05, + "loss": 1.8139, + "num_input_tokens_seen": 73463132, + "step": 1109 + }, + { + "epoch": 0.10380493284036131, + "loss": 1.838038444519043, + "loss_ce": 0.006007233634591103, + "loss_iou": 0.79296875, + "loss_num": 0.049560546875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 73463132, + "step": 1109 + }, + { + "epoch": 0.10389853512425704, + "grad_norm": 23.26406478881836, + "learning_rate": 5e-05, + "loss": 1.4926, + "num_input_tokens_seen": 73527964, + "step": 1110 + }, + { + "epoch": 0.10389853512425704, + "loss": 1.2852752208709717, + "loss_ce": 0.005856221076101065, + "loss_iou": 0.51953125, + "loss_num": 0.048828125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 73527964, + "step": 1110 + }, + { + "epoch": 0.10399213740815276, + "grad_norm": 16.523807525634766, + "learning_rate": 5e-05, + "loss": 1.5728, + "num_input_tokens_seen": 73594392, + "step": 1111 + }, + { + "epoch": 0.10399213740815276, + "loss": 1.7789335250854492, + "loss_ce": 0.010378850623965263, + "loss_iou": 0.75390625, + "loss_num": 0.052490234375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 73594392, + "step": 1111 + }, + { + "epoch": 0.10408573969204848, + "grad_norm": 9.896635055541992, + "learning_rate": 5e-05, + "loss": 1.4108, + "num_input_tokens_seen": 73661308, + "step": 1112 + }, + { + "epoch": 0.10408573969204848, + "loss": 1.3604661226272583, + "loss_ce": 0.003044302109628916, + "loss_iou": 0.5859375, + "loss_num": 0.037109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 73661308, + "step": 1112 + }, + { + "epoch": 0.10417934197594421, + "grad_norm": 22.082138061523438, + "learning_rate": 5e-05, + "loss": 1.5699, + "num_input_tokens_seen": 73727672, + "step": 1113 + }, + { + "epoch": 0.10417934197594421, + "loss": 1.567997932434082, + "loss_ce": 0.007451091427356005, + "loss_iou": 0.66015625, + "loss_num": 0.048828125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 73727672, + "step": 1113 + }, + { + "epoch": 0.10427294425983993, + "grad_norm": 20.720991134643555, + "learning_rate": 5e-05, + "loss": 1.7929, + "num_input_tokens_seen": 73792744, + "step": 1114 + }, + { + "epoch": 0.10427294425983993, + "loss": 2.0892069339752197, + "loss_ce": 0.007175697945058346, + "loss_iou": 0.88671875, + "loss_num": 0.062255859375, + "loss_xval": 2.078125, + "num_input_tokens_seen": 73792744, + "step": 1114 + }, + { + "epoch": 0.10436654654373567, + "grad_norm": 51.49057388305664, + "learning_rate": 5e-05, + "loss": 1.4627, + "num_input_tokens_seen": 73858620, + "step": 1115 + }, + { + "epoch": 0.10436654654373567, + "loss": 1.385817289352417, + "loss_ce": 0.006422717124223709, + "loss_iou": 0.59375, + "loss_num": 0.039306640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 73858620, + "step": 1115 + }, + { + "epoch": 0.1044601488276314, + "grad_norm": 16.018857955932617, + "learning_rate": 5e-05, + "loss": 1.6839, + "num_input_tokens_seen": 73924180, + "step": 1116 + }, + { + "epoch": 0.1044601488276314, + "loss": 1.8350038528442383, + "loss_ce": 0.0049257902428507805, + "loss_iou": 0.7734375, + "loss_num": 0.0576171875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 73924180, + "step": 1116 + }, + { + "epoch": 0.10455375111152712, + "grad_norm": 12.003381729125977, + "learning_rate": 5e-05, + "loss": 1.6046, + "num_input_tokens_seen": 73990164, + "step": 1117 + }, + { + "epoch": 0.10455375111152712, + "loss": 1.821317434310913, + "loss_ce": 0.006864245980978012, + "loss_iou": 0.76953125, + "loss_num": 0.055419921875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 73990164, + "step": 1117 + }, + { + "epoch": 0.10464735339542285, + "grad_norm": 17.458560943603516, + "learning_rate": 5e-05, + "loss": 1.5408, + "num_input_tokens_seen": 74056232, + "step": 1118 + }, + { + "epoch": 0.10464735339542285, + "loss": 1.5779576301574707, + "loss_ce": 0.005691987462341785, + "loss_iou": 0.63671875, + "loss_num": 0.06005859375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 74056232, + "step": 1118 + }, + { + "epoch": 0.10474095567931857, + "grad_norm": 24.969619750976562, + "learning_rate": 5e-05, + "loss": 1.8344, + "num_input_tokens_seen": 74123288, + "step": 1119 + }, + { + "epoch": 0.10474095567931857, + "loss": 1.7968626022338867, + "loss_ce": 0.002917362842708826, + "loss_iou": 0.76953125, + "loss_num": 0.050537109375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 74123288, + "step": 1119 + }, + { + "epoch": 0.10483455796321431, + "grad_norm": 20.472679138183594, + "learning_rate": 5e-05, + "loss": 1.5267, + "num_input_tokens_seen": 74189428, + "step": 1120 + }, + { + "epoch": 0.10483455796321431, + "loss": 1.5565311908721924, + "loss_ce": 0.006238183006644249, + "loss_iou": 0.6640625, + "loss_num": 0.044189453125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 74189428, + "step": 1120 + }, + { + "epoch": 0.10492816024711003, + "grad_norm": 54.381935119628906, + "learning_rate": 5e-05, + "loss": 1.5578, + "num_input_tokens_seen": 74256172, + "step": 1121 + }, + { + "epoch": 0.10492816024711003, + "loss": 1.524882435798645, + "loss_ce": 0.004374627023935318, + "loss_iou": 0.66796875, + "loss_num": 0.036865234375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 74256172, + "step": 1121 + }, + { + "epoch": 0.10502176253100576, + "grad_norm": 15.588903427124023, + "learning_rate": 5e-05, + "loss": 1.4205, + "num_input_tokens_seen": 74322556, + "step": 1122 + }, + { + "epoch": 0.10502176253100576, + "loss": 1.4770457744598389, + "loss_ce": 0.004145374521613121, + "loss_iou": 0.66015625, + "loss_num": 0.0299072265625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 74322556, + "step": 1122 + }, + { + "epoch": 0.10511536481490148, + "grad_norm": 18.953041076660156, + "learning_rate": 5e-05, + "loss": 1.427, + "num_input_tokens_seen": 74389180, + "step": 1123 + }, + { + "epoch": 0.10511536481490148, + "loss": 1.4715644121170044, + "loss_ce": 0.0028144335374236107, + "loss_iou": 0.609375, + "loss_num": 0.051025390625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 74389180, + "step": 1123 + }, + { + "epoch": 0.10520896709879721, + "grad_norm": 21.091703414916992, + "learning_rate": 5e-05, + "loss": 1.4477, + "num_input_tokens_seen": 74455756, + "step": 1124 + }, + { + "epoch": 0.10520896709879721, + "loss": 1.4070497751235962, + "loss_ce": 0.0037294612266123295, + "loss_iou": 0.65625, + "loss_num": 0.0184326171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 74455756, + "step": 1124 + }, + { + "epoch": 0.10530256938269293, + "grad_norm": 36.85136795043945, + "learning_rate": 5e-05, + "loss": 1.7221, + "num_input_tokens_seen": 74521528, + "step": 1125 + }, + { + "epoch": 0.10530256938269293, + "loss": 1.437005877494812, + "loss_ce": 0.003045933321118355, + "loss_iou": 0.61328125, + "loss_num": 0.0419921875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 74521528, + "step": 1125 + }, + { + "epoch": 0.10539617166658867, + "grad_norm": 14.059605598449707, + "learning_rate": 5e-05, + "loss": 1.9773, + "num_input_tokens_seen": 74587804, + "step": 1126 + }, + { + "epoch": 0.10539617166658867, + "loss": 2.071012020111084, + "loss_ce": 0.008512133732438087, + "loss_iou": 0.84765625, + "loss_num": 0.0732421875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 74587804, + "step": 1126 + }, + { + "epoch": 0.1054897739504844, + "grad_norm": 19.6878604888916, + "learning_rate": 5e-05, + "loss": 1.4327, + "num_input_tokens_seen": 74654252, + "step": 1127 + }, + { + "epoch": 0.1054897739504844, + "loss": 1.3145666122436523, + "loss_ce": 0.0015783183043822646, + "loss_iou": 0.6015625, + "loss_num": 0.021484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 74654252, + "step": 1127 + }, + { + "epoch": 0.10558337623438012, + "grad_norm": 18.852392196655273, + "learning_rate": 5e-05, + "loss": 1.5887, + "num_input_tokens_seen": 74720920, + "step": 1128 + }, + { + "epoch": 0.10558337623438012, + "loss": 1.7087366580963135, + "loss_ce": 0.003658563131466508, + "loss_iou": 0.6953125, + "loss_num": 0.06396484375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 74720920, + "step": 1128 + }, + { + "epoch": 0.10567697851827584, + "grad_norm": 15.907039642333984, + "learning_rate": 5e-05, + "loss": 1.6453, + "num_input_tokens_seen": 74787364, + "step": 1129 + }, + { + "epoch": 0.10567697851827584, + "loss": 1.430016279220581, + "loss_ce": 0.005211664829403162, + "loss_iou": 0.6328125, + "loss_num": 0.03173828125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 74787364, + "step": 1129 + }, + { + "epoch": 0.10577058080217157, + "grad_norm": 13.575602531433105, + "learning_rate": 5e-05, + "loss": 1.3277, + "num_input_tokens_seen": 74852120, + "step": 1130 + }, + { + "epoch": 0.10577058080217157, + "loss": 1.2877238988876343, + "loss_ce": 0.0025675965007394552, + "loss_iou": 0.51171875, + "loss_num": 0.052490234375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 74852120, + "step": 1130 + }, + { + "epoch": 0.1058641830860673, + "grad_norm": 18.148401260375977, + "learning_rate": 5e-05, + "loss": 1.4902, + "num_input_tokens_seen": 74917696, + "step": 1131 + }, + { + "epoch": 0.1058641830860673, + "loss": 1.5735406875610352, + "loss_ce": 0.0032282189931720495, + "loss_iou": 0.671875, + "loss_num": 0.044921875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 74917696, + "step": 1131 + }, + { + "epoch": 0.10595778536996303, + "grad_norm": 17.016647338867188, + "learning_rate": 5e-05, + "loss": 1.5977, + "num_input_tokens_seen": 74983904, + "step": 1132 + }, + { + "epoch": 0.10595778536996303, + "loss": 1.4564955234527588, + "loss_ce": 0.00922992080450058, + "loss_iou": 0.62109375, + "loss_num": 0.041259765625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 74983904, + "step": 1132 + }, + { + "epoch": 0.10605138765385876, + "grad_norm": 30.827829360961914, + "learning_rate": 5e-05, + "loss": 1.5576, + "num_input_tokens_seen": 75050432, + "step": 1133 + }, + { + "epoch": 0.10605138765385876, + "loss": 1.4756646156311035, + "loss_ce": 0.0027642915956676006, + "loss_iou": 0.640625, + "loss_num": 0.038818359375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 75050432, + "step": 1133 + }, + { + "epoch": 0.10614498993775448, + "grad_norm": 14.184310913085938, + "learning_rate": 5e-05, + "loss": 1.6376, + "num_input_tokens_seen": 75116180, + "step": 1134 + }, + { + "epoch": 0.10614498993775448, + "loss": 1.1849550008773804, + "loss_ce": 0.004596044309437275, + "loss_iou": 0.498046875, + "loss_num": 0.03662109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 75116180, + "step": 1134 + }, + { + "epoch": 0.1062385922216502, + "grad_norm": 12.556612968444824, + "learning_rate": 5e-05, + "loss": 1.472, + "num_input_tokens_seen": 75182036, + "step": 1135 + }, + { + "epoch": 0.1062385922216502, + "loss": 1.4659476280212402, + "loss_ce": 0.0050101205706596375, + "loss_iou": 0.62890625, + "loss_num": 0.0400390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 75182036, + "step": 1135 + }, + { + "epoch": 0.10633219450554593, + "grad_norm": 14.626906394958496, + "learning_rate": 5e-05, + "loss": 1.3054, + "num_input_tokens_seen": 75249140, + "step": 1136 + }, + { + "epoch": 0.10633219450554593, + "loss": 1.2172424793243408, + "loss_ce": 0.003375312313437462, + "loss_iou": 0.5390625, + "loss_num": 0.02734375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 75249140, + "step": 1136 + }, + { + "epoch": 0.10642579678944167, + "grad_norm": 19.031736373901367, + "learning_rate": 5e-05, + "loss": 1.4979, + "num_input_tokens_seen": 75316344, + "step": 1137 + }, + { + "epoch": 0.10642579678944167, + "loss": 1.5610942840576172, + "loss_ce": 0.008359957486391068, + "loss_iou": 0.69921875, + "loss_num": 0.03076171875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 75316344, + "step": 1137 + }, + { + "epoch": 0.10651939907333739, + "grad_norm": 15.841978073120117, + "learning_rate": 5e-05, + "loss": 1.6722, + "num_input_tokens_seen": 75382524, + "step": 1138 + }, + { + "epoch": 0.10651939907333739, + "loss": 1.7715243101119995, + "loss_ce": 0.00492275133728981, + "loss_iou": 0.7734375, + "loss_num": 0.043701171875, + "loss_xval": 1.765625, + "num_input_tokens_seen": 75382524, + "step": 1138 + }, + { + "epoch": 0.10661300135723312, + "grad_norm": 24.407976150512695, + "learning_rate": 5e-05, + "loss": 1.5927, + "num_input_tokens_seen": 75449316, + "step": 1139 + }, + { + "epoch": 0.10661300135723312, + "loss": 1.6843119859695435, + "loss_ce": 0.003647799603641033, + "loss_iou": 0.7109375, + "loss_num": 0.0517578125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 75449316, + "step": 1139 + }, + { + "epoch": 0.10670660364112884, + "grad_norm": 17.990219116210938, + "learning_rate": 5e-05, + "loss": 1.6142, + "num_input_tokens_seen": 75515588, + "step": 1140 + }, + { + "epoch": 0.10670660364112884, + "loss": 1.5659946203231812, + "loss_ce": 0.007400865666568279, + "loss_iou": 0.6796875, + "loss_num": 0.039794921875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 75515588, + "step": 1140 + }, + { + "epoch": 0.10680020592502457, + "grad_norm": 28.435983657836914, + "learning_rate": 5e-05, + "loss": 1.6111, + "num_input_tokens_seen": 75581540, + "step": 1141 + }, + { + "epoch": 0.10680020592502457, + "loss": 1.423324465751648, + "loss_ce": 0.008651572279632092, + "loss_iou": 0.640625, + "loss_num": 0.0269775390625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 75581540, + "step": 1141 + }, + { + "epoch": 0.1068938082089203, + "grad_norm": 27.305858612060547, + "learning_rate": 5e-05, + "loss": 1.7486, + "num_input_tokens_seen": 75647720, + "step": 1142 + }, + { + "epoch": 0.1068938082089203, + "loss": 1.6713736057281494, + "loss_ce": 0.007311083376407623, + "loss_iou": 0.6796875, + "loss_num": 0.061767578125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 75647720, + "step": 1142 + }, + { + "epoch": 0.10698741049281603, + "grad_norm": 15.698596000671387, + "learning_rate": 5e-05, + "loss": 1.5414, + "num_input_tokens_seen": 75714040, + "step": 1143 + }, + { + "epoch": 0.10698741049281603, + "loss": 1.4805867671966553, + "loss_ce": 0.005000769160687923, + "loss_iou": 0.640625, + "loss_num": 0.039306640625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 75714040, + "step": 1143 + }, + { + "epoch": 0.10708101277671175, + "grad_norm": 31.52184295654297, + "learning_rate": 5e-05, + "loss": 1.3789, + "num_input_tokens_seen": 75780376, + "step": 1144 + }, + { + "epoch": 0.10708101277671175, + "loss": 1.4856374263763428, + "loss_ce": 0.004680377896875143, + "loss_iou": 0.625, + "loss_num": 0.046142578125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 75780376, + "step": 1144 + }, + { + "epoch": 0.10717461506060748, + "grad_norm": 17.525299072265625, + "learning_rate": 5e-05, + "loss": 1.7459, + "num_input_tokens_seen": 75846568, + "step": 1145 + }, + { + "epoch": 0.10717461506060748, + "loss": 1.8164008855819702, + "loss_ce": 0.00780714675784111, + "loss_iou": 0.796875, + "loss_num": 0.042724609375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 75846568, + "step": 1145 + }, + { + "epoch": 0.1072682173445032, + "grad_norm": 19.073184967041016, + "learning_rate": 5e-05, + "loss": 1.4566, + "num_input_tokens_seen": 75911820, + "step": 1146 + }, + { + "epoch": 0.1072682173445032, + "loss": 1.1922235488891602, + "loss_ce": 0.006188367493450642, + "loss_iou": 0.50390625, + "loss_num": 0.035888671875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 75911820, + "step": 1146 + }, + { + "epoch": 0.10736181962839893, + "grad_norm": 13.780308723449707, + "learning_rate": 5e-05, + "loss": 1.2774, + "num_input_tokens_seen": 75977592, + "step": 1147 + }, + { + "epoch": 0.10736181962839893, + "loss": 1.286110281944275, + "loss_ce": 0.006325116381049156, + "loss_iou": 0.5390625, + "loss_num": 0.04052734375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 75977592, + "step": 1147 + }, + { + "epoch": 0.10745542191229467, + "grad_norm": 25.382017135620117, + "learning_rate": 5e-05, + "loss": 1.6546, + "num_input_tokens_seen": 76043848, + "step": 1148 + }, + { + "epoch": 0.10745542191229467, + "loss": 1.7315950393676758, + "loss_ce": 0.006497358903288841, + "loss_iou": 0.75, + "loss_num": 0.04443359375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 76043848, + "step": 1148 + }, + { + "epoch": 0.10754902419619039, + "grad_norm": 15.511128425598145, + "learning_rate": 5e-05, + "loss": 1.7734, + "num_input_tokens_seen": 76110168, + "step": 1149 + }, + { + "epoch": 0.10754902419619039, + "loss": 1.8539836406707764, + "loss_ce": 0.003885905258357525, + "loss_iou": 0.73828125, + "loss_num": 0.07470703125, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 76110168, + "step": 1149 + }, + { + "epoch": 0.10764262648008611, + "grad_norm": 18.047719955444336, + "learning_rate": 5e-05, + "loss": 1.3925, + "num_input_tokens_seen": 76176880, + "step": 1150 + }, + { + "epoch": 0.10764262648008611, + "loss": 1.360733985900879, + "loss_ce": 0.0033121337182819843, + "loss_iou": 0.609375, + "loss_num": 0.02783203125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 76176880, + "step": 1150 + }, + { + "epoch": 0.10773622876398184, + "grad_norm": 17.841157913208008, + "learning_rate": 5e-05, + "loss": 1.4778, + "num_input_tokens_seen": 76243416, + "step": 1151 + }, + { + "epoch": 0.10773622876398184, + "loss": 1.3315365314483643, + "loss_ce": 0.005181404761970043, + "loss_iou": 0.5546875, + "loss_num": 0.043212890625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 76243416, + "step": 1151 + }, + { + "epoch": 0.10782983104787756, + "grad_norm": 17.448850631713867, + "learning_rate": 5e-05, + "loss": 1.452, + "num_input_tokens_seen": 76308860, + "step": 1152 + }, + { + "epoch": 0.10782983104787756, + "loss": 1.2795655727386475, + "loss_ce": 0.003442568937316537, + "loss_iou": 0.54296875, + "loss_num": 0.03857421875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 76308860, + "step": 1152 + }, + { + "epoch": 0.10792343333177329, + "grad_norm": 32.93611145019531, + "learning_rate": 5e-05, + "loss": 1.5278, + "num_input_tokens_seen": 76375076, + "step": 1153 + }, + { + "epoch": 0.10792343333177329, + "loss": 1.5748491287231445, + "loss_ce": 0.0025834650732576847, + "loss_iou": 0.6953125, + "loss_num": 0.0361328125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 76375076, + "step": 1153 + }, + { + "epoch": 0.10801703561566903, + "grad_norm": 18.724693298339844, + "learning_rate": 5e-05, + "loss": 1.6567, + "num_input_tokens_seen": 76441148, + "step": 1154 + }, + { + "epoch": 0.10801703561566903, + "loss": 1.6741814613342285, + "loss_ce": 0.0023064701817929745, + "loss_iou": 0.75, + "loss_num": 0.034912109375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 76441148, + "step": 1154 + }, + { + "epoch": 0.10811063789956475, + "grad_norm": 16.60007667541504, + "learning_rate": 5e-05, + "loss": 1.4983, + "num_input_tokens_seen": 76508524, + "step": 1155 + }, + { + "epoch": 0.10811063789956475, + "loss": 1.4101048707962036, + "loss_ce": 0.004343102686107159, + "loss_iou": 0.6171875, + "loss_num": 0.034912109375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 76508524, + "step": 1155 + }, + { + "epoch": 0.10820424018346048, + "grad_norm": 35.742671966552734, + "learning_rate": 5e-05, + "loss": 1.4645, + "num_input_tokens_seen": 76574908, + "step": 1156 + }, + { + "epoch": 0.10820424018346048, + "loss": 1.3999061584472656, + "loss_ce": 0.0014685725327581167, + "loss_iou": 0.609375, + "loss_num": 0.03515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 76574908, + "step": 1156 + }, + { + "epoch": 0.1082978424673562, + "grad_norm": 14.900714874267578, + "learning_rate": 5e-05, + "loss": 1.9227, + "num_input_tokens_seen": 76641568, + "step": 1157 + }, + { + "epoch": 0.1082978424673562, + "loss": 2.0242905616760254, + "loss_ce": 0.004759383387863636, + "loss_iou": 0.8984375, + "loss_num": 0.04541015625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 76641568, + "step": 1157 + }, + { + "epoch": 0.10839144475125193, + "grad_norm": 19.15547752380371, + "learning_rate": 5e-05, + "loss": 1.6838, + "num_input_tokens_seen": 76708668, + "step": 1158 + }, + { + "epoch": 0.10839144475125193, + "loss": 1.7164533138275146, + "loss_ce": 0.0035627796314656734, + "loss_iou": 0.75390625, + "loss_num": 0.04150390625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 76708668, + "step": 1158 + }, + { + "epoch": 0.10848504703514766, + "grad_norm": 23.434572219848633, + "learning_rate": 5e-05, + "loss": 1.4897, + "num_input_tokens_seen": 76774852, + "step": 1159 + }, + { + "epoch": 0.10848504703514766, + "loss": 1.4935109615325928, + "loss_ce": 0.0071828728541731834, + "loss_iou": 0.609375, + "loss_num": 0.05419921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 76774852, + "step": 1159 + }, + { + "epoch": 0.10857864931904339, + "grad_norm": 17.213069915771484, + "learning_rate": 5e-05, + "loss": 1.5325, + "num_input_tokens_seen": 76840692, + "step": 1160 + }, + { + "epoch": 0.10857864931904339, + "loss": 1.3890526294708252, + "loss_ce": 0.004775314591825008, + "loss_iou": 0.609375, + "loss_num": 0.033447265625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 76840692, + "step": 1160 + }, + { + "epoch": 0.10867225160293911, + "grad_norm": 10.26416301727295, + "learning_rate": 5e-05, + "loss": 1.4267, + "num_input_tokens_seen": 76907736, + "step": 1161 + }, + { + "epoch": 0.10867225160293911, + "loss": 1.375671625137329, + "loss_ce": 0.005554396193474531, + "loss_iou": 0.6015625, + "loss_num": 0.0341796875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 76907736, + "step": 1161 + }, + { + "epoch": 0.10876585388683484, + "grad_norm": 86.20824432373047, + "learning_rate": 5e-05, + "loss": 1.7641, + "num_input_tokens_seen": 76973836, + "step": 1162 + }, + { + "epoch": 0.10876585388683484, + "loss": 1.735271692276001, + "loss_ce": 0.004802928771823645, + "loss_iou": 0.671875, + "loss_num": 0.078125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 76973836, + "step": 1162 + }, + { + "epoch": 0.10885945617073056, + "grad_norm": 19.393543243408203, + "learning_rate": 5e-05, + "loss": 1.9077, + "num_input_tokens_seen": 77040316, + "step": 1163 + }, + { + "epoch": 0.10885945617073056, + "loss": 1.9816315174102783, + "loss_ce": 0.005069003440439701, + "loss_iou": 0.84375, + "loss_num": 0.056884765625, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 77040316, + "step": 1163 + }, + { + "epoch": 0.10895305845462629, + "grad_norm": 25.78728675842285, + "learning_rate": 5e-05, + "loss": 1.6901, + "num_input_tokens_seen": 77105984, + "step": 1164 + }, + { + "epoch": 0.10895305845462629, + "loss": 1.5702900886535645, + "loss_ce": 0.0038838728796690702, + "loss_iou": 0.70703125, + "loss_num": 0.0301513671875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 77105984, + "step": 1164 + }, + { + "epoch": 0.10904666073852202, + "grad_norm": 26.9300537109375, + "learning_rate": 5e-05, + "loss": 1.5518, + "num_input_tokens_seen": 77172180, + "step": 1165 + }, + { + "epoch": 0.10904666073852202, + "loss": 1.6828746795654297, + "loss_ce": 0.008069908246397972, + "loss_iou": 0.72265625, + "loss_num": 0.04638671875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 77172180, + "step": 1165 + }, + { + "epoch": 0.10914026302241775, + "grad_norm": 19.31071662902832, + "learning_rate": 5e-05, + "loss": 1.7768, + "num_input_tokens_seen": 77238456, + "step": 1166 + }, + { + "epoch": 0.10914026302241775, + "loss": 1.920846939086914, + "loss_ce": 0.005807799287140369, + "loss_iou": 0.82421875, + "loss_num": 0.05322265625, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 77238456, + "step": 1166 + }, + { + "epoch": 0.10923386530631347, + "grad_norm": 11.709778785705566, + "learning_rate": 5e-05, + "loss": 1.4159, + "num_input_tokens_seen": 77305156, + "step": 1167 + }, + { + "epoch": 0.10923386530631347, + "loss": 1.3690240383148193, + "loss_ce": 0.002813051687553525, + "loss_iou": 0.60546875, + "loss_num": 0.031494140625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 77305156, + "step": 1167 + }, + { + "epoch": 0.1093274675902092, + "grad_norm": 16.924415588378906, + "learning_rate": 5e-05, + "loss": 1.5059, + "num_input_tokens_seen": 77369532, + "step": 1168 + }, + { + "epoch": 0.1093274675902092, + "loss": 1.5888309478759766, + "loss_ce": 0.006799612659960985, + "loss_iou": 0.609375, + "loss_num": 0.07275390625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 77369532, + "step": 1168 + }, + { + "epoch": 0.10942106987410492, + "grad_norm": 14.870468139648438, + "learning_rate": 5e-05, + "loss": 1.6683, + "num_input_tokens_seen": 77436100, + "step": 1169 + }, + { + "epoch": 0.10942106987410492, + "loss": 1.7231367826461792, + "loss_ce": 0.006339915096759796, + "loss_iou": 0.75, + "loss_num": 0.043701171875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 77436100, + "step": 1169 + }, + { + "epoch": 0.10951467215800066, + "grad_norm": 15.986180305480957, + "learning_rate": 5e-05, + "loss": 1.454, + "num_input_tokens_seen": 77500004, + "step": 1170 + }, + { + "epoch": 0.10951467215800066, + "loss": 1.194554090499878, + "loss_ce": 0.003269959706813097, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 77500004, + "step": 1170 + }, + { + "epoch": 0.10960827444189639, + "grad_norm": 19.57866668701172, + "learning_rate": 5e-05, + "loss": 1.429, + "num_input_tokens_seen": 77565940, + "step": 1171 + }, + { + "epoch": 0.10960827444189639, + "loss": 1.4926564693450928, + "loss_ce": 0.005840121768414974, + "loss_iou": 0.62890625, + "loss_num": 0.04541015625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 77565940, + "step": 1171 + }, + { + "epoch": 0.10970187672579211, + "grad_norm": 33.70952224731445, + "learning_rate": 5e-05, + "loss": 1.8232, + "num_input_tokens_seen": 77632920, + "step": 1172 + }, + { + "epoch": 0.10970187672579211, + "loss": 1.8883156776428223, + "loss_ce": 0.008432943373918533, + "loss_iou": 0.8125, + "loss_num": 0.05029296875, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 77632920, + "step": 1172 + }, + { + "epoch": 0.10979547900968784, + "grad_norm": 15.238907814025879, + "learning_rate": 5e-05, + "loss": 1.6148, + "num_input_tokens_seen": 77699720, + "step": 1173 + }, + { + "epoch": 0.10979547900968784, + "loss": 1.4983510971069336, + "loss_ce": 0.006163555197417736, + "loss_iou": 0.6328125, + "loss_num": 0.045654296875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 77699720, + "step": 1173 + }, + { + "epoch": 0.10988908129358356, + "grad_norm": 15.282456398010254, + "learning_rate": 5e-05, + "loss": 1.6071, + "num_input_tokens_seen": 77765748, + "step": 1174 + }, + { + "epoch": 0.10988908129358356, + "loss": 1.3932451009750366, + "loss_ce": 0.004573243670165539, + "loss_iou": 0.6015625, + "loss_num": 0.03759765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 77765748, + "step": 1174 + }, + { + "epoch": 0.10998268357747928, + "grad_norm": 10.538078308105469, + "learning_rate": 5e-05, + "loss": 1.642, + "num_input_tokens_seen": 77832460, + "step": 1175 + }, + { + "epoch": 0.10998268357747928, + "loss": 1.758239984512329, + "loss_ce": 0.008239900693297386, + "loss_iou": 0.734375, + "loss_num": 0.055908203125, + "loss_xval": 1.75, + "num_input_tokens_seen": 77832460, + "step": 1175 + }, + { + "epoch": 0.11007628586137502, + "grad_norm": 13.04195785522461, + "learning_rate": 5e-05, + "loss": 1.4915, + "num_input_tokens_seen": 77898296, + "step": 1176 + }, + { + "epoch": 0.11007628586137502, + "loss": 1.4313489198684692, + "loss_ce": 0.005323539488017559, + "loss_iou": 0.58984375, + "loss_num": 0.04931640625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 77898296, + "step": 1176 + }, + { + "epoch": 0.11016988814527075, + "grad_norm": 15.571574211120605, + "learning_rate": 5e-05, + "loss": 1.5161, + "num_input_tokens_seen": 77965436, + "step": 1177 + }, + { + "epoch": 0.11016988814527075, + "loss": 1.526175618171692, + "loss_ce": 0.009085720404982567, + "loss_iou": 0.62109375, + "loss_num": 0.055419921875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 77965436, + "step": 1177 + }, + { + "epoch": 0.11026349042916647, + "grad_norm": 20.632858276367188, + "learning_rate": 5e-05, + "loss": 1.5977, + "num_input_tokens_seen": 78031220, + "step": 1178 + }, + { + "epoch": 0.11026349042916647, + "loss": 1.8103036880493164, + "loss_ce": 0.0017099155811592937, + "loss_iou": 0.7734375, + "loss_num": 0.052734375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 78031220, + "step": 1178 + }, + { + "epoch": 0.1103570927130622, + "grad_norm": 48.338584899902344, + "learning_rate": 5e-05, + "loss": 1.7292, + "num_input_tokens_seen": 78097232, + "step": 1179 + }, + { + "epoch": 0.1103570927130622, + "loss": 1.6525057554244995, + "loss_ce": 0.009988706558942795, + "loss_iou": 0.64453125, + "loss_num": 0.0703125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 78097232, + "step": 1179 + }, + { + "epoch": 0.11045069499695792, + "grad_norm": 17.45369529724121, + "learning_rate": 5e-05, + "loss": 1.7281, + "num_input_tokens_seen": 78163752, + "step": 1180 + }, + { + "epoch": 0.11045069499695792, + "loss": 1.7762234210968018, + "loss_ce": 0.0018094154074788094, + "loss_iou": 0.77734375, + "loss_num": 0.04443359375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 78163752, + "step": 1180 + }, + { + "epoch": 0.11054429728085366, + "grad_norm": 16.85646629333496, + "learning_rate": 5e-05, + "loss": 1.5844, + "num_input_tokens_seen": 78229032, + "step": 1181 + }, + { + "epoch": 0.11054429728085366, + "loss": 1.802498698234558, + "loss_ce": 0.003182369517162442, + "loss_iou": 0.734375, + "loss_num": 0.06591796875, + "loss_xval": 1.796875, + "num_input_tokens_seen": 78229032, + "step": 1181 + }, + { + "epoch": 0.11063789956474938, + "grad_norm": 30.342195510864258, + "learning_rate": 5e-05, + "loss": 1.5089, + "num_input_tokens_seen": 78295148, + "step": 1182 + }, + { + "epoch": 0.11063789956474938, + "loss": 1.5325207710266113, + "loss_ce": 0.008106620982289314, + "loss_iou": 0.63671875, + "loss_num": 0.05029296875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 78295148, + "step": 1182 + }, + { + "epoch": 0.11073150184864511, + "grad_norm": 74.4539794921875, + "learning_rate": 5e-05, + "loss": 1.6102, + "num_input_tokens_seen": 78358716, + "step": 1183 + }, + { + "epoch": 0.11073150184864511, + "loss": 1.818831205368042, + "loss_ce": 0.002913268283009529, + "loss_iou": 0.796875, + "loss_num": 0.043212890625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 78358716, + "step": 1183 + }, + { + "epoch": 0.11082510413254083, + "grad_norm": 17.200271606445312, + "learning_rate": 5e-05, + "loss": 1.5589, + "num_input_tokens_seen": 78425420, + "step": 1184 + }, + { + "epoch": 0.11082510413254083, + "loss": 1.528570532798767, + "loss_ce": 0.004766830243170261, + "loss_iou": 0.671875, + "loss_num": 0.036865234375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 78425420, + "step": 1184 + }, + { + "epoch": 0.11091870641643656, + "grad_norm": 22.485254287719727, + "learning_rate": 5e-05, + "loss": 1.7477, + "num_input_tokens_seen": 78491684, + "step": 1185 + }, + { + "epoch": 0.11091870641643656, + "loss": 1.7003130912780762, + "loss_ce": 0.005000641569495201, + "loss_iou": 0.69921875, + "loss_num": 0.05908203125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 78491684, + "step": 1185 + }, + { + "epoch": 0.11101230870033228, + "grad_norm": 17.89415740966797, + "learning_rate": 5e-05, + "loss": 1.7692, + "num_input_tokens_seen": 78557320, + "step": 1186 + }, + { + "epoch": 0.11101230870033228, + "loss": 1.7814184427261353, + "loss_ce": 0.0021215705201029778, + "loss_iou": 0.765625, + "loss_num": 0.05029296875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 78557320, + "step": 1186 + }, + { + "epoch": 0.11110591098422802, + "grad_norm": 13.24902629852295, + "learning_rate": 5e-05, + "loss": 1.5665, + "num_input_tokens_seen": 78623740, + "step": 1187 + }, + { + "epoch": 0.11110591098422802, + "loss": 1.5412065982818604, + "loss_ce": 0.005073728505522013, + "loss_iou": 0.6484375, + "loss_num": 0.04833984375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 78623740, + "step": 1187 + }, + { + "epoch": 0.11119951326812375, + "grad_norm": 23.279170989990234, + "learning_rate": 5e-05, + "loss": 1.2237, + "num_input_tokens_seen": 78689792, + "step": 1188 + }, + { + "epoch": 0.11119951326812375, + "loss": 1.0964205265045166, + "loss_ce": 0.0032809292897582054, + "loss_iou": 0.4375, + "loss_num": 0.04345703125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 78689792, + "step": 1188 + }, + { + "epoch": 0.11129311555201947, + "grad_norm": 14.37207317352295, + "learning_rate": 5e-05, + "loss": 1.5489, + "num_input_tokens_seen": 78755728, + "step": 1189 + }, + { + "epoch": 0.11129311555201947, + "loss": 1.81374990940094, + "loss_ce": 0.004179581068456173, + "loss_iou": 0.76171875, + "loss_num": 0.057373046875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 78755728, + "step": 1189 + }, + { + "epoch": 0.1113867178359152, + "grad_norm": 23.36548614501953, + "learning_rate": 5e-05, + "loss": 1.4067, + "num_input_tokens_seen": 78822156, + "step": 1190 + }, + { + "epoch": 0.1113867178359152, + "loss": 1.4242346286773682, + "loss_ce": 0.006265765056014061, + "loss_iou": 0.59765625, + "loss_num": 0.044677734375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 78822156, + "step": 1190 + }, + { + "epoch": 0.11148032011981092, + "grad_norm": 18.049659729003906, + "learning_rate": 5e-05, + "loss": 1.5596, + "num_input_tokens_seen": 78888476, + "step": 1191 + }, + { + "epoch": 0.11148032011981092, + "loss": 1.4020355939865112, + "loss_ce": 0.00359805254265666, + "loss_iou": 0.59375, + "loss_num": 0.042724609375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 78888476, + "step": 1191 + }, + { + "epoch": 0.11157392240370666, + "grad_norm": 20.797494888305664, + "learning_rate": 5e-05, + "loss": 1.5431, + "num_input_tokens_seen": 78954864, + "step": 1192 + }, + { + "epoch": 0.11157392240370666, + "loss": 1.367311954498291, + "loss_ce": 0.005495484918355942, + "loss_iou": 0.5078125, + "loss_num": 0.06884765625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 78954864, + "step": 1192 + }, + { + "epoch": 0.11166752468760238, + "grad_norm": 24.160797119140625, + "learning_rate": 5e-05, + "loss": 1.578, + "num_input_tokens_seen": 79022160, + "step": 1193 + }, + { + "epoch": 0.11166752468760238, + "loss": 1.5062320232391357, + "loss_ce": 0.0013492072466760874, + "loss_iou": 0.66015625, + "loss_num": 0.036865234375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 79022160, + "step": 1193 + }, + { + "epoch": 0.1117611269714981, + "grad_norm": 121.45783996582031, + "learning_rate": 5e-05, + "loss": 1.6582, + "num_input_tokens_seen": 79088028, + "step": 1194 + }, + { + "epoch": 0.1117611269714981, + "loss": 1.662060022354126, + "loss_ce": 0.006786718033254147, + "loss_iou": 0.69140625, + "loss_num": 0.053955078125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 79088028, + "step": 1194 + }, + { + "epoch": 0.11185472925539383, + "grad_norm": 26.80828094482422, + "learning_rate": 5e-05, + "loss": 1.5069, + "num_input_tokens_seen": 79154080, + "step": 1195 + }, + { + "epoch": 0.11185472925539383, + "loss": 1.549135446548462, + "loss_ce": 0.005190128460526466, + "loss_iou": 0.6875, + "loss_num": 0.0341796875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 79154080, + "step": 1195 + }, + { + "epoch": 0.11194833153928956, + "grad_norm": 44.29054641723633, + "learning_rate": 5e-05, + "loss": 1.6384, + "num_input_tokens_seen": 79222248, + "step": 1196 + }, + { + "epoch": 0.11194833153928956, + "loss": 1.426701307296753, + "loss_ce": 0.002873264020308852, + "loss_iou": 0.64453125, + "loss_num": 0.027099609375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 79222248, + "step": 1196 + }, + { + "epoch": 0.11204193382318528, + "grad_norm": 17.937210083007812, + "learning_rate": 5e-05, + "loss": 1.4789, + "num_input_tokens_seen": 79287212, + "step": 1197 + }, + { + "epoch": 0.11204193382318528, + "loss": 1.263474702835083, + "loss_ce": 0.004197364207357168, + "loss_iou": 0.515625, + "loss_num": 0.044921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 79287212, + "step": 1197 + }, + { + "epoch": 0.11213553610708102, + "grad_norm": 26.6240291595459, + "learning_rate": 5e-05, + "loss": 1.4653, + "num_input_tokens_seen": 79353820, + "step": 1198 + }, + { + "epoch": 0.11213553610708102, + "loss": 1.3803198337554932, + "loss_ce": 0.009226055815815926, + "loss_iou": 0.56640625, + "loss_num": 0.0478515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 79353820, + "step": 1198 + }, + { + "epoch": 0.11222913839097674, + "grad_norm": 17.058929443359375, + "learning_rate": 5e-05, + "loss": 1.6894, + "num_input_tokens_seen": 79420152, + "step": 1199 + }, + { + "epoch": 0.11222913839097674, + "loss": 1.6565816402435303, + "loss_ce": 0.005214488599449396, + "loss_iou": 0.73828125, + "loss_num": 0.03515625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 79420152, + "step": 1199 + }, + { + "epoch": 0.11232274067487247, + "grad_norm": 17.481552124023438, + "learning_rate": 5e-05, + "loss": 1.3827, + "num_input_tokens_seen": 79487128, + "step": 1200 + }, + { + "epoch": 0.11232274067487247, + "loss": 1.5578417778015137, + "loss_ce": 0.0031542000360786915, + "loss_iou": 0.67578125, + "loss_num": 0.04052734375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 79487128, + "step": 1200 + }, + { + "epoch": 0.11241634295876819, + "grad_norm": 17.883695602416992, + "learning_rate": 5e-05, + "loss": 1.824, + "num_input_tokens_seen": 79553028, + "step": 1201 + }, + { + "epoch": 0.11241634295876819, + "loss": 1.742805004119873, + "loss_ce": 0.006476853042840958, + "loss_iou": 0.71875, + "loss_num": 0.0595703125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 79553028, + "step": 1201 + }, + { + "epoch": 0.11250994524266392, + "grad_norm": 31.487567901611328, + "learning_rate": 5e-05, + "loss": 1.6337, + "num_input_tokens_seen": 79618784, + "step": 1202 + }, + { + "epoch": 0.11250994524266392, + "loss": 1.580298662185669, + "loss_ce": 0.0070565007627010345, + "loss_iou": 0.67578125, + "loss_num": 0.044677734375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 79618784, + "step": 1202 + }, + { + "epoch": 0.11260354752655964, + "grad_norm": 13.180530548095703, + "learning_rate": 5e-05, + "loss": 1.6058, + "num_input_tokens_seen": 79684240, + "step": 1203 + }, + { + "epoch": 0.11260354752655964, + "loss": 1.654146671295166, + "loss_ce": 0.005556548945605755, + "loss_iou": 0.69921875, + "loss_num": 0.05029296875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 79684240, + "step": 1203 + }, + { + "epoch": 0.11269714981045538, + "grad_norm": 18.37868881225586, + "learning_rate": 5e-05, + "loss": 1.5486, + "num_input_tokens_seen": 79749764, + "step": 1204 + }, + { + "epoch": 0.11269714981045538, + "loss": 1.5879004001617432, + "loss_ce": 0.005380784161388874, + "loss_iou": 0.69921875, + "loss_num": 0.037109375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 79749764, + "step": 1204 + }, + { + "epoch": 0.1127907520943511, + "grad_norm": 21.069881439208984, + "learning_rate": 5e-05, + "loss": 1.2377, + "num_input_tokens_seen": 79815504, + "step": 1205 + }, + { + "epoch": 0.1127907520943511, + "loss": 1.0924522876739502, + "loss_ce": 0.0040734270587563515, + "loss_iou": 0.48046875, + "loss_num": 0.02587890625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 79815504, + "step": 1205 + }, + { + "epoch": 0.11288435437824683, + "grad_norm": 55.62563705444336, + "learning_rate": 5e-05, + "loss": 1.534, + "num_input_tokens_seen": 79880720, + "step": 1206 + }, + { + "epoch": 0.11288435437824683, + "loss": 1.7343441247940063, + "loss_ce": 0.001189854578115046, + "loss_iou": 0.76171875, + "loss_num": 0.04150390625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 79880720, + "step": 1206 + }, + { + "epoch": 0.11297795666214255, + "grad_norm": 13.673062324523926, + "learning_rate": 5e-05, + "loss": 1.2566, + "num_input_tokens_seen": 79947628, + "step": 1207 + }, + { + "epoch": 0.11297795666214255, + "loss": 1.2388091087341309, + "loss_ce": 0.012490655295550823, + "loss_iou": 0.5234375, + "loss_num": 0.036376953125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 79947628, + "step": 1207 + }, + { + "epoch": 0.11307155894603828, + "grad_norm": 31.4323787689209, + "learning_rate": 5e-05, + "loss": 1.5795, + "num_input_tokens_seen": 80013796, + "step": 1208 + }, + { + "epoch": 0.11307155894603828, + "loss": 1.6102848052978516, + "loss_ce": 0.006769146770238876, + "loss_iou": 0.6953125, + "loss_num": 0.04296875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 80013796, + "step": 1208 + }, + { + "epoch": 0.11316516122993402, + "grad_norm": 19.149446487426758, + "learning_rate": 5e-05, + "loss": 1.7813, + "num_input_tokens_seen": 80080252, + "step": 1209 + }, + { + "epoch": 0.11316516122993402, + "loss": 2.0932846069335938, + "loss_ce": 0.004417495336383581, + "loss_iou": 0.84765625, + "loss_num": 0.0791015625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 80080252, + "step": 1209 + }, + { + "epoch": 0.11325876351382974, + "grad_norm": 23.946191787719727, + "learning_rate": 5e-05, + "loss": 1.4635, + "num_input_tokens_seen": 80147240, + "step": 1210 + }, + { + "epoch": 0.11325876351382974, + "loss": 1.636222243309021, + "loss_ce": 0.010733958333730698, + "loss_iou": 0.6875, + "loss_num": 0.05029296875, + "loss_xval": 1.625, + "num_input_tokens_seen": 80147240, + "step": 1210 + }, + { + "epoch": 0.11335236579772547, + "grad_norm": 15.929207801818848, + "learning_rate": 5e-05, + "loss": 1.1694, + "num_input_tokens_seen": 80212560, + "step": 1211 + }, + { + "epoch": 0.11335236579772547, + "loss": 1.4668045043945312, + "loss_ce": 0.005378691479563713, + "loss_iou": 0.6015625, + "loss_num": 0.051025390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 80212560, + "step": 1211 + }, + { + "epoch": 0.11344596808162119, + "grad_norm": 16.284374237060547, + "learning_rate": 5e-05, + "loss": 1.6608, + "num_input_tokens_seen": 80277664, + "step": 1212 + }, + { + "epoch": 0.11344596808162119, + "loss": 1.7077348232269287, + "loss_ce": 0.0076615395955741405, + "loss_iou": 0.6953125, + "loss_num": 0.061279296875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 80277664, + "step": 1212 + }, + { + "epoch": 0.11353957036551691, + "grad_norm": 20.27515983581543, + "learning_rate": 5e-05, + "loss": 1.6985, + "num_input_tokens_seen": 80344340, + "step": 1213 + }, + { + "epoch": 0.11353957036551691, + "loss": 1.7379794120788574, + "loss_ce": 0.005557590164244175, + "loss_iou": 0.734375, + "loss_num": 0.0517578125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 80344340, + "step": 1213 + }, + { + "epoch": 0.11363317264941264, + "grad_norm": 28.41344451904297, + "learning_rate": 5e-05, + "loss": 1.1733, + "num_input_tokens_seen": 80410172, + "step": 1214 + }, + { + "epoch": 0.11363317264941264, + "loss": 1.2601943016052246, + "loss_ce": 0.005311502143740654, + "loss_iou": 0.55078125, + "loss_num": 0.03125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 80410172, + "step": 1214 + }, + { + "epoch": 0.11372677493330838, + "grad_norm": 24.274377822875977, + "learning_rate": 5e-05, + "loss": 1.6907, + "num_input_tokens_seen": 80475008, + "step": 1215 + }, + { + "epoch": 0.11372677493330838, + "loss": 1.854823112487793, + "loss_ce": 0.005213777534663677, + "loss_iou": 0.7578125, + "loss_num": 0.0673828125, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 80475008, + "step": 1215 + }, + { + "epoch": 0.1138203772172041, + "grad_norm": 11.962911605834961, + "learning_rate": 5e-05, + "loss": 1.7314, + "num_input_tokens_seen": 80540392, + "step": 1216 + }, + { + "epoch": 0.1138203772172041, + "loss": 1.848841667175293, + "loss_ce": 0.004115085117518902, + "loss_iou": 0.796875, + "loss_num": 0.049560546875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 80540392, + "step": 1216 + }, + { + "epoch": 0.11391397950109983, + "grad_norm": 12.080779075622559, + "learning_rate": 5e-05, + "loss": 1.3515, + "num_input_tokens_seen": 80607548, + "step": 1217 + }, + { + "epoch": 0.11391397950109983, + "loss": 1.11570143699646, + "loss_ce": 0.0020539246033877134, + "loss_iou": 0.46484375, + "loss_num": 0.036376953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 80607548, + "step": 1217 + }, + { + "epoch": 0.11400758178499555, + "grad_norm": 39.70487976074219, + "learning_rate": 5e-05, + "loss": 1.6139, + "num_input_tokens_seen": 80673836, + "step": 1218 + }, + { + "epoch": 0.11400758178499555, + "loss": 1.6444294452667236, + "loss_ce": 0.003804431762546301, + "loss_iou": 0.69140625, + "loss_num": 0.051025390625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 80673836, + "step": 1218 + }, + { + "epoch": 0.11410118406889128, + "grad_norm": 11.806645393371582, + "learning_rate": 5e-05, + "loss": 1.796, + "num_input_tokens_seen": 80739184, + "step": 1219 + }, + { + "epoch": 0.11410118406889128, + "loss": 1.8441953659057617, + "loss_ce": 0.007281385827809572, + "loss_iou": 0.8359375, + "loss_num": 0.033447265625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 80739184, + "step": 1219 + }, + { + "epoch": 0.11419478635278701, + "grad_norm": 42.97815704345703, + "learning_rate": 5e-05, + "loss": 1.6916, + "num_input_tokens_seen": 80804612, + "step": 1220 + }, + { + "epoch": 0.11419478635278701, + "loss": 1.8572298288345337, + "loss_ce": 0.001761129591614008, + "loss_iou": 0.80859375, + "loss_num": 0.04736328125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 80804612, + "step": 1220 + }, + { + "epoch": 0.11428838863668274, + "grad_norm": 32.125545501708984, + "learning_rate": 5e-05, + "loss": 1.4851, + "num_input_tokens_seen": 80871492, + "step": 1221 + }, + { + "epoch": 0.11428838863668274, + "loss": 1.7160561084747314, + "loss_ce": 0.005118582397699356, + "loss_iou": 0.75, + "loss_num": 0.04150390625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 80871492, + "step": 1221 + }, + { + "epoch": 0.11438199092057846, + "grad_norm": 23.3707218170166, + "learning_rate": 5e-05, + "loss": 1.2845, + "num_input_tokens_seen": 80936608, + "step": 1222 + }, + { + "epoch": 0.11438199092057846, + "loss": 1.3835852146148682, + "loss_ce": 0.006143780425190926, + "loss_iou": 0.5703125, + "loss_num": 0.047607421875, + "loss_xval": 1.375, + "num_input_tokens_seen": 80936608, + "step": 1222 + }, + { + "epoch": 0.11447559320447419, + "grad_norm": 17.40350914001465, + "learning_rate": 5e-05, + "loss": 1.4408, + "num_input_tokens_seen": 81003080, + "step": 1223 + }, + { + "epoch": 0.11447559320447419, + "loss": 1.3919970989227295, + "loss_ce": 0.003813605522736907, + "loss_iou": 0.6484375, + "loss_num": 0.01806640625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 81003080, + "step": 1223 + }, + { + "epoch": 0.11456919548836991, + "grad_norm": 29.1763973236084, + "learning_rate": 5e-05, + "loss": 1.7472, + "num_input_tokens_seen": 81069080, + "step": 1224 + }, + { + "epoch": 0.11456919548836991, + "loss": 1.8503440618515015, + "loss_ce": 0.001711297663860023, + "loss_iou": 0.84375, + "loss_num": 0.0322265625, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 81069080, + "step": 1224 + }, + { + "epoch": 0.11466279777226564, + "grad_norm": 15.609042167663574, + "learning_rate": 5e-05, + "loss": 1.5495, + "num_input_tokens_seen": 81136428, + "step": 1225 + }, + { + "epoch": 0.11466279777226564, + "loss": 1.3167450428009033, + "loss_ce": 0.0060760462656617165, + "loss_iou": 0.58984375, + "loss_num": 0.0257568359375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 81136428, + "step": 1225 + }, + { + "epoch": 0.11475640005616138, + "grad_norm": 24.668973922729492, + "learning_rate": 5e-05, + "loss": 1.5587, + "num_input_tokens_seen": 81202296, + "step": 1226 + }, + { + "epoch": 0.11475640005616138, + "loss": 1.5846580266952515, + "loss_ce": 0.0016501974314451218, + "loss_iou": 0.68359375, + "loss_num": 0.04345703125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 81202296, + "step": 1226 + }, + { + "epoch": 0.1148500023400571, + "grad_norm": 78.94477081298828, + "learning_rate": 5e-05, + "loss": 1.8377, + "num_input_tokens_seen": 81269728, + "step": 1227 + }, + { + "epoch": 0.1148500023400571, + "loss": 1.6405736207962036, + "loss_ce": 0.003854891285300255, + "loss_iou": 0.75, + "loss_num": 0.02685546875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 81269728, + "step": 1227 + }, + { + "epoch": 0.11494360462395282, + "grad_norm": 13.085610389709473, + "learning_rate": 5e-05, + "loss": 1.6935, + "num_input_tokens_seen": 81336104, + "step": 1228 + }, + { + "epoch": 0.11494360462395282, + "loss": 1.722845196723938, + "loss_ce": 0.002264118054881692, + "loss_iou": 0.74609375, + "loss_num": 0.04638671875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 81336104, + "step": 1228 + }, + { + "epoch": 0.11503720690784855, + "grad_norm": 15.163686752319336, + "learning_rate": 5e-05, + "loss": 1.6346, + "num_input_tokens_seen": 81403708, + "step": 1229 + }, + { + "epoch": 0.11503720690784855, + "loss": 1.617916464805603, + "loss_ce": 0.006588327698409557, + "loss_iou": 0.7109375, + "loss_num": 0.038330078125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 81403708, + "step": 1229 + }, + { + "epoch": 0.11513080919174427, + "grad_norm": 9.518630981445312, + "learning_rate": 5e-05, + "loss": 1.2484, + "num_input_tokens_seen": 81469744, + "step": 1230 + }, + { + "epoch": 0.11513080919174427, + "loss": 1.3734846115112305, + "loss_ce": 0.004344025161117315, + "loss_iou": 0.61328125, + "loss_num": 0.0281982421875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 81469744, + "step": 1230 + }, + { + "epoch": 0.11522441147564001, + "grad_norm": 17.46893882751465, + "learning_rate": 5e-05, + "loss": 1.7261, + "num_input_tokens_seen": 81535924, + "step": 1231 + }, + { + "epoch": 0.11522441147564001, + "loss": 1.5726174116134644, + "loss_ce": 0.007187769748270512, + "loss_iou": 0.671875, + "loss_num": 0.044677734375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 81535924, + "step": 1231 + }, + { + "epoch": 0.11531801375953574, + "grad_norm": 22.973176956176758, + "learning_rate": 5e-05, + "loss": 1.5868, + "num_input_tokens_seen": 81602740, + "step": 1232 + }, + { + "epoch": 0.11531801375953574, + "loss": 1.644413948059082, + "loss_ce": 0.0028124612290412188, + "loss_iou": 0.7265625, + "loss_num": 0.036865234375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 81602740, + "step": 1232 + }, + { + "epoch": 0.11541161604343146, + "grad_norm": 22.150278091430664, + "learning_rate": 5e-05, + "loss": 1.4655, + "num_input_tokens_seen": 81668772, + "step": 1233 + }, + { + "epoch": 0.11541161604343146, + "loss": 1.3677325248718262, + "loss_ce": 0.0022540693171322346, + "loss_iou": 0.59765625, + "loss_num": 0.033203125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 81668772, + "step": 1233 + }, + { + "epoch": 0.11550521832732719, + "grad_norm": 16.834016799926758, + "learning_rate": 5e-05, + "loss": 1.7075, + "num_input_tokens_seen": 81736168, + "step": 1234 + }, + { + "epoch": 0.11550521832732719, + "loss": 1.7671141624450684, + "loss_ce": 0.0014891426544636488, + "loss_iou": 0.7734375, + "loss_num": 0.044189453125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 81736168, + "step": 1234 + }, + { + "epoch": 0.11559882061122291, + "grad_norm": 11.076343536376953, + "learning_rate": 5e-05, + "loss": 1.2712, + "num_input_tokens_seen": 81802404, + "step": 1235 + }, + { + "epoch": 0.11559882061122291, + "loss": 1.2763545513153076, + "loss_ce": 0.003893628716468811, + "loss_iou": 0.5234375, + "loss_num": 0.045654296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 81802404, + "step": 1235 + }, + { + "epoch": 0.11569242289511864, + "grad_norm": 20.424575805664062, + "learning_rate": 5e-05, + "loss": 1.4339, + "num_input_tokens_seen": 81868092, + "step": 1236 + }, + { + "epoch": 0.11569242289511864, + "loss": 1.5448195934295654, + "loss_ce": 0.0028273831121623516, + "loss_iou": 0.67578125, + "loss_num": 0.038818359375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 81868092, + "step": 1236 + }, + { + "epoch": 0.11578602517901437, + "grad_norm": 51.29452896118164, + "learning_rate": 5e-05, + "loss": 1.5949, + "num_input_tokens_seen": 81934956, + "step": 1237 + }, + { + "epoch": 0.11578602517901437, + "loss": 1.6440658569335938, + "loss_ce": 0.0044175381772220135, + "loss_iou": 0.73828125, + "loss_num": 0.032470703125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 81934956, + "step": 1237 + }, + { + "epoch": 0.1158796274629101, + "grad_norm": 15.815311431884766, + "learning_rate": 5e-05, + "loss": 1.7589, + "num_input_tokens_seen": 82001952, + "step": 1238 + }, + { + "epoch": 0.1158796274629101, + "loss": 1.9351770877838135, + "loss_ce": 0.007442661561071873, + "loss_iou": 0.8046875, + "loss_num": 0.0634765625, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 82001952, + "step": 1238 + }, + { + "epoch": 0.11597322974680582, + "grad_norm": 18.532611846923828, + "learning_rate": 5e-05, + "loss": 1.6198, + "num_input_tokens_seen": 82068772, + "step": 1239 + }, + { + "epoch": 0.11597322974680582, + "loss": 1.597364902496338, + "loss_ce": 0.0041031865403056145, + "loss_iou": 0.69140625, + "loss_num": 0.042236328125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 82068772, + "step": 1239 + }, + { + "epoch": 0.11606683203070155, + "grad_norm": 18.645465850830078, + "learning_rate": 5e-05, + "loss": 1.7358, + "num_input_tokens_seen": 82135532, + "step": 1240 + }, + { + "epoch": 0.11606683203070155, + "loss": 1.7387772798538208, + "loss_ce": 0.004402315244078636, + "loss_iou": 0.7109375, + "loss_num": 0.0634765625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 82135532, + "step": 1240 + }, + { + "epoch": 0.11616043431459727, + "grad_norm": 16.821317672729492, + "learning_rate": 5e-05, + "loss": 1.4882, + "num_input_tokens_seen": 82201672, + "step": 1241 + }, + { + "epoch": 0.11616043431459727, + "loss": 1.4463303089141846, + "loss_ce": 0.0039473664946854115, + "loss_iou": 0.65625, + "loss_num": 0.0252685546875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 82201672, + "step": 1241 + }, + { + "epoch": 0.116254036598493, + "grad_norm": 28.336610794067383, + "learning_rate": 5e-05, + "loss": 1.6999, + "num_input_tokens_seen": 82267820, + "step": 1242 + }, + { + "epoch": 0.116254036598493, + "loss": 1.676544189453125, + "loss_ce": 0.009552114643156528, + "loss_iou": 0.71875, + "loss_num": 0.046142578125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 82267820, + "step": 1242 + }, + { + "epoch": 0.11634763888238873, + "grad_norm": 32.36832046508789, + "learning_rate": 5e-05, + "loss": 1.5587, + "num_input_tokens_seen": 82334836, + "step": 1243 + }, + { + "epoch": 0.11634763888238873, + "loss": 1.7433605194091797, + "loss_ce": 0.0031261455733329058, + "loss_iou": 0.7421875, + "loss_num": 0.05126953125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 82334836, + "step": 1243 + }, + { + "epoch": 0.11644124116628446, + "grad_norm": 31.063182830810547, + "learning_rate": 5e-05, + "loss": 1.6658, + "num_input_tokens_seen": 82402016, + "step": 1244 + }, + { + "epoch": 0.11644124116628446, + "loss": 1.5226678848266602, + "loss_ce": 0.009972486644983292, + "loss_iou": 0.6640625, + "loss_num": 0.036865234375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 82402016, + "step": 1244 + }, + { + "epoch": 0.11653484345018018, + "grad_norm": 21.888381958007812, + "learning_rate": 5e-05, + "loss": 1.5815, + "num_input_tokens_seen": 82467508, + "step": 1245 + }, + { + "epoch": 0.11653484345018018, + "loss": 1.4058010578155518, + "loss_ce": 0.0029691134113818407, + "loss_iou": 0.55859375, + "loss_num": 0.056884765625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 82467508, + "step": 1245 + }, + { + "epoch": 0.11662844573407591, + "grad_norm": 12.787288665771484, + "learning_rate": 5e-05, + "loss": 1.1663, + "num_input_tokens_seen": 82534112, + "step": 1246 + }, + { + "epoch": 0.11662844573407591, + "loss": 1.4278309345245361, + "loss_ce": 0.0010730300564318895, + "loss_iou": 0.6328125, + "loss_num": 0.032958984375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 82534112, + "step": 1246 + }, + { + "epoch": 0.11672204801797163, + "grad_norm": 16.461729049682617, + "learning_rate": 5e-05, + "loss": 1.4065, + "num_input_tokens_seen": 82599828, + "step": 1247 + }, + { + "epoch": 0.11672204801797163, + "loss": 1.4895188808441162, + "loss_ce": 0.00367908226326108, + "loss_iou": 0.64453125, + "loss_num": 0.038818359375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 82599828, + "step": 1247 + }, + { + "epoch": 0.11681565030186737, + "grad_norm": 22.454593658447266, + "learning_rate": 5e-05, + "loss": 1.6579, + "num_input_tokens_seen": 82666620, + "step": 1248 + }, + { + "epoch": 0.11681565030186737, + "loss": 1.6648619174957275, + "loss_ce": 0.0047057876363396645, + "loss_iou": 0.7265625, + "loss_num": 0.041259765625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 82666620, + "step": 1248 + }, + { + "epoch": 0.1169092525857631, + "grad_norm": 40.78807830810547, + "learning_rate": 5e-05, + "loss": 1.5815, + "num_input_tokens_seen": 82732352, + "step": 1249 + }, + { + "epoch": 0.1169092525857631, + "loss": 1.781855821609497, + "loss_ce": 0.002558845328167081, + "loss_iou": 0.78125, + "loss_num": 0.04443359375, + "loss_xval": 1.78125, + "num_input_tokens_seen": 82732352, + "step": 1249 + }, + { + "epoch": 0.11700285486965882, + "grad_norm": 20.71820640563965, + "learning_rate": 5e-05, + "loss": 1.7601, + "num_input_tokens_seen": 82798912, + "step": 1250 + }, + { + "epoch": 0.11700285486965882, + "eval_seeclick_CIoU": 0.1287650465965271, + "eval_seeclick_GIoU": 0.13856594264507294, + "eval_seeclick_IoU": 0.248815655708313, + "eval_seeclick_MAE_all": 0.16578662395477295, + "eval_seeclick_MAE_h": 0.08586683869361877, + "eval_seeclick_MAE_w": 0.12790357321500778, + "eval_seeclick_MAE_x_boxes": 0.27299533784389496, + "eval_seeclick_MAE_y_boxes": 0.11505845375359058, + "eval_seeclick_NUM_probability": 0.998512476682663, + "eval_seeclick_inside_bbox": 0.41875000298023224, + "eval_seeclick_loss": 2.5617454051971436, + "eval_seeclick_loss_ce": 0.013203508220613003, + "eval_seeclick_loss_iou": 0.876708984375, + "eval_seeclick_loss_num": 0.157806396484375, + "eval_seeclick_loss_xval": 2.541015625, + "eval_seeclick_runtime": 62.0666, + "eval_seeclick_samples_per_second": 0.757, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 82798912, + "step": 1250 + }, + { + "epoch": 0.11700285486965882, + "eval_icons_CIoU": -0.07235723733901978, + "eval_icons_GIoU": -0.003447722876444459, + "eval_icons_IoU": 0.08474742993712425, + "eval_icons_MAE_all": 0.14920702576637268, + "eval_icons_MAE_h": 0.07863498479127884, + "eval_icons_MAE_w": 0.1913459524512291, + "eval_icons_MAE_x_boxes": 0.09553009271621704, + "eval_icons_MAE_y_boxes": 0.12014838308095932, + "eval_icons_NUM_probability": 0.997991144657135, + "eval_icons_inside_bbox": 0.2638888955116272, + "eval_icons_loss": 2.7587175369262695, + "eval_icons_loss_ce": 0.00024980072339531034, + "eval_icons_loss_iou": 1.017578125, + "eval_icons_loss_num": 0.1510772705078125, + "eval_icons_loss_xval": 2.79052734375, + "eval_icons_runtime": 64.9029, + "eval_icons_samples_per_second": 0.77, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 82798912, + "step": 1250 + }, + { + "epoch": 0.11700285486965882, + "eval_screenspot_CIoU": 0.010961043337980906, + "eval_screenspot_GIoU": 0.01767559101184209, + "eval_screenspot_IoU": 0.1899357189734777, + "eval_screenspot_MAE_all": 0.1625519891579946, + "eval_screenspot_MAE_h": 0.10704489052295685, + "eval_screenspot_MAE_w": 0.15161699056625366, + "eval_screenspot_MAE_x_boxes": 0.19744082788626352, + "eval_screenspot_MAE_y_boxes": 0.1427758956948916, + "eval_screenspot_NUM_probability": 0.9987198114395142, + "eval_screenspot_inside_bbox": 0.40708333253860474, + "eval_screenspot_loss": 2.805513858795166, + "eval_screenspot_loss_ce": 0.014925556567807993, + "eval_screenspot_loss_iou": 0.99853515625, + "eval_screenspot_loss_num": 0.1709136962890625, + "eval_screenspot_loss_xval": 2.8528645833333335, + "eval_screenspot_runtime": 110.9727, + "eval_screenspot_samples_per_second": 0.802, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 82798912, + "step": 1250 + }, + { + "epoch": 0.11700285486965882, + "eval_compot_CIoU": 0.034002652391791344, + "eval_compot_GIoU": 0.029855364933609962, + "eval_compot_IoU": 0.16077818721532822, + "eval_compot_MAE_all": 0.12482038512825966, + "eval_compot_MAE_h": 0.07313639391213655, + "eval_compot_MAE_w": 0.0934268981218338, + "eval_compot_MAE_x_boxes": 0.17276836931705475, + "eval_compot_MAE_y_boxes": 0.09329631552100182, + "eval_compot_NUM_probability": 0.9992510080337524, + "eval_compot_inside_bbox": 0.40625, + "eval_compot_loss": 2.634563684463501, + "eval_compot_loss_ce": 0.004644116037525237, + "eval_compot_loss_iou": 1.0078125, + "eval_compot_loss_num": 0.1324005126953125, + "eval_compot_loss_xval": 2.677734375, + "eval_compot_runtime": 77.0866, + "eval_compot_samples_per_second": 0.649, + "eval_compot_steps_per_second": 0.026, + "num_input_tokens_seen": 82798912, + "step": 1250 + }, + { + "epoch": 0.11700285486965882, + "eval_custom_ui_MAE_all": 0.1432972252368927, + "eval_custom_ui_MAE_x": 0.1295909397304058, + "eval_custom_ui_MAE_y": 0.15700352936983109, + "eval_custom_ui_NUM_probability": 0.9995890855789185, + "eval_custom_ui_loss": 0.7637790441513062, + "eval_custom_ui_loss_ce": 0.07840772718191147, + "eval_custom_ui_loss_num": 0.138702392578125, + "eval_custom_ui_loss_xval": 0.6934814453125, + "eval_custom_ui_runtime": 59.9836, + "eval_custom_ui_samples_per_second": 0.834, + "eval_custom_ui_steps_per_second": 0.033, + "num_input_tokens_seen": 82798912, + "step": 1250 + }, + { + "epoch": 0.11700285486965882, + "loss": 0.7642842531204224, + "loss_ce": 0.08703814446926117, + "loss_iou": 0.0, + "loss_num": 0.1357421875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 82798912, + "step": 1250 + }, + { + "epoch": 0.11709645715355455, + "grad_norm": 18.837018966674805, + "learning_rate": 5e-05, + "loss": 1.4492, + "num_input_tokens_seen": 82864888, + "step": 1251 + }, + { + "epoch": 0.11709645715355455, + "loss": 1.3731420040130615, + "loss_ce": 0.00400142464786768, + "loss_iou": 0.5390625, + "loss_num": 0.05810546875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 82864888, + "step": 1251 + }, + { + "epoch": 0.11719005943745027, + "grad_norm": 31.525707244873047, + "learning_rate": 5e-05, + "loss": 1.4484, + "num_input_tokens_seen": 82931916, + "step": 1252 + }, + { + "epoch": 0.11719005943745027, + "loss": 1.5041272640228271, + "loss_ce": 0.009010091423988342, + "loss_iou": 0.66015625, + "loss_num": 0.034912109375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 82931916, + "step": 1252 + }, + { + "epoch": 0.117283661721346, + "grad_norm": 18.124881744384766, + "learning_rate": 5e-05, + "loss": 1.7309, + "num_input_tokens_seen": 82999304, + "step": 1253 + }, + { + "epoch": 0.117283661721346, + "loss": 1.5895214080810547, + "loss_ce": 0.0035839076153934, + "loss_iou": 0.69921875, + "loss_num": 0.037109375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 82999304, + "step": 1253 + }, + { + "epoch": 0.11737726400524173, + "grad_norm": 16.497982025146484, + "learning_rate": 5e-05, + "loss": 1.6244, + "num_input_tokens_seen": 83065436, + "step": 1254 + }, + { + "epoch": 0.11737726400524173, + "loss": 1.752490758895874, + "loss_ce": 0.004688110668212175, + "loss_iou": 0.7109375, + "loss_num": 0.06494140625, + "loss_xval": 1.75, + "num_input_tokens_seen": 83065436, + "step": 1254 + }, + { + "epoch": 0.11747086628913746, + "grad_norm": 17.74229621887207, + "learning_rate": 5e-05, + "loss": 1.4879, + "num_input_tokens_seen": 83130840, + "step": 1255 + }, + { + "epoch": 0.11747086628913746, + "loss": 1.7128605842590332, + "loss_ce": 0.007294106297194958, + "loss_iou": 0.70703125, + "loss_num": 0.05859375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 83130840, + "step": 1255 + }, + { + "epoch": 0.11756446857303318, + "grad_norm": 16.521076202392578, + "learning_rate": 5e-05, + "loss": 1.4597, + "num_input_tokens_seen": 83197100, + "step": 1256 + }, + { + "epoch": 0.11756446857303318, + "loss": 1.4006917476654053, + "loss_ce": 0.005672247149050236, + "loss_iou": 0.5703125, + "loss_num": 0.05126953125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 83197100, + "step": 1256 + }, + { + "epoch": 0.1176580708569289, + "grad_norm": 33.442562103271484, + "learning_rate": 5e-05, + "loss": 1.5389, + "num_input_tokens_seen": 83263084, + "step": 1257 + }, + { + "epoch": 0.1176580708569289, + "loss": 1.6844170093536377, + "loss_ce": 0.0057061705738306046, + "loss_iou": 0.71875, + "loss_num": 0.04833984375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 83263084, + "step": 1257 + }, + { + "epoch": 0.11775167314082463, + "grad_norm": 24.860952377319336, + "learning_rate": 5e-05, + "loss": 1.4648, + "num_input_tokens_seen": 83329536, + "step": 1258 + }, + { + "epoch": 0.11775167314082463, + "loss": 1.4786264896392822, + "loss_ce": 0.006946785841137171, + "loss_iou": 0.65625, + "loss_num": 0.03271484375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 83329536, + "step": 1258 + }, + { + "epoch": 0.11784527542472037, + "grad_norm": 17.826082229614258, + "learning_rate": 5e-05, + "loss": 1.5541, + "num_input_tokens_seen": 83395532, + "step": 1259 + }, + { + "epoch": 0.11784527542472037, + "loss": 1.3716524839401245, + "loss_ce": 0.0021456663962453604, + "loss_iou": 0.57421875, + "loss_num": 0.0439453125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 83395532, + "step": 1259 + }, + { + "epoch": 0.1179388777086161, + "grad_norm": 21.652984619140625, + "learning_rate": 5e-05, + "loss": 1.5288, + "num_input_tokens_seen": 83461984, + "step": 1260 + }, + { + "epoch": 0.1179388777086161, + "loss": 1.3684253692626953, + "loss_ce": 0.004655827768146992, + "loss_iou": 0.5546875, + "loss_num": 0.05126953125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 83461984, + "step": 1260 + }, + { + "epoch": 0.11803247999251182, + "grad_norm": 98.35587310791016, + "learning_rate": 5e-05, + "loss": 1.6681, + "num_input_tokens_seen": 83529256, + "step": 1261 + }, + { + "epoch": 0.11803247999251182, + "loss": 1.677626132965088, + "loss_ce": 0.00672775087878108, + "loss_iou": 0.69921875, + "loss_num": 0.05517578125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 83529256, + "step": 1261 + }, + { + "epoch": 0.11812608227640754, + "grad_norm": 15.623915672302246, + "learning_rate": 5e-05, + "loss": 1.8378, + "num_input_tokens_seen": 83594752, + "step": 1262 + }, + { + "epoch": 0.11812608227640754, + "loss": 1.5487499237060547, + "loss_ce": 0.0035839397460222244, + "loss_iou": 0.6875, + "loss_num": 0.033935546875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 83594752, + "step": 1262 + }, + { + "epoch": 0.11821968456030327, + "grad_norm": 15.402535438537598, + "learning_rate": 5e-05, + "loss": 1.6479, + "num_input_tokens_seen": 83660632, + "step": 1263 + }, + { + "epoch": 0.11821968456030327, + "loss": 1.6366450786590576, + "loss_ce": 0.004900718107819557, + "loss_iou": 0.72265625, + "loss_num": 0.037109375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 83660632, + "step": 1263 + }, + { + "epoch": 0.11831328684419899, + "grad_norm": 11.214442253112793, + "learning_rate": 5e-05, + "loss": 1.2146, + "num_input_tokens_seen": 83725400, + "step": 1264 + }, + { + "epoch": 0.11831328684419899, + "loss": 1.0320955514907837, + "loss_ce": 0.00426351698115468, + "loss_iou": 0.404296875, + "loss_num": 0.0439453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 83725400, + "step": 1264 + }, + { + "epoch": 0.11840688912809473, + "grad_norm": 17.171525955200195, + "learning_rate": 5e-05, + "loss": 1.2422, + "num_input_tokens_seen": 83791140, + "step": 1265 + }, + { + "epoch": 0.11840688912809473, + "loss": 1.1908378601074219, + "loss_ce": 0.0043144794180989265, + "loss_iou": 0.4609375, + "loss_num": 0.052734375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 83791140, + "step": 1265 + }, + { + "epoch": 0.11850049141199046, + "grad_norm": 10.413750648498535, + "learning_rate": 5e-05, + "loss": 1.3303, + "num_input_tokens_seen": 83856912, + "step": 1266 + }, + { + "epoch": 0.11850049141199046, + "loss": 1.2113043069839478, + "loss_ce": 0.004883345682173967, + "loss_iou": 0.51953125, + "loss_num": 0.033447265625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 83856912, + "step": 1266 + }, + { + "epoch": 0.11859409369588618, + "grad_norm": 20.207469940185547, + "learning_rate": 5e-05, + "loss": 1.4605, + "num_input_tokens_seen": 83923696, + "step": 1267 + }, + { + "epoch": 0.11859409369588618, + "loss": 1.6463834047317505, + "loss_ce": 0.0038052310701459646, + "loss_iou": 0.6953125, + "loss_num": 0.050048828125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 83923696, + "step": 1267 + }, + { + "epoch": 0.1186876959797819, + "grad_norm": 19.983022689819336, + "learning_rate": 5e-05, + "loss": 1.3416, + "num_input_tokens_seen": 83989660, + "step": 1268 + }, + { + "epoch": 0.1186876959797819, + "loss": 1.4917185306549072, + "loss_ce": 0.0073434836231172085, + "loss_iou": 0.62890625, + "loss_num": 0.044677734375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 83989660, + "step": 1268 + }, + { + "epoch": 0.11878129826367763, + "grad_norm": 24.613861083984375, + "learning_rate": 5e-05, + "loss": 1.6368, + "num_input_tokens_seen": 84056984, + "step": 1269 + }, + { + "epoch": 0.11878129826367763, + "loss": 1.7018146514892578, + "loss_ce": 0.0016192832263186574, + "loss_iou": 0.7421875, + "loss_num": 0.04345703125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 84056984, + "step": 1269 + }, + { + "epoch": 0.11887490054757337, + "grad_norm": 18.163482666015625, + "learning_rate": 5e-05, + "loss": 1.6242, + "num_input_tokens_seen": 84122832, + "step": 1270 + }, + { + "epoch": 0.11887490054757337, + "loss": 1.6886608600616455, + "loss_ce": 0.0016491420101374388, + "loss_iou": 0.72265625, + "loss_num": 0.0478515625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 84122832, + "step": 1270 + }, + { + "epoch": 0.11896850283146909, + "grad_norm": 14.846522331237793, + "learning_rate": 5e-05, + "loss": 1.4988, + "num_input_tokens_seen": 84189052, + "step": 1271 + }, + { + "epoch": 0.11896850283146909, + "loss": 1.6928069591522217, + "loss_ce": 0.009213217534124851, + "loss_iou": 0.703125, + "loss_num": 0.0556640625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 84189052, + "step": 1271 + }, + { + "epoch": 0.11906210511536482, + "grad_norm": 15.97767448425293, + "learning_rate": 5e-05, + "loss": 1.2843, + "num_input_tokens_seen": 84254416, + "step": 1272 + }, + { + "epoch": 0.11906210511536482, + "loss": 1.2944265604019165, + "loss_ce": 0.004387491848319769, + "loss_iou": 0.58203125, + "loss_num": 0.0255126953125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 84254416, + "step": 1272 + }, + { + "epoch": 0.11915570739926054, + "grad_norm": 18.81927490234375, + "learning_rate": 5e-05, + "loss": 1.3602, + "num_input_tokens_seen": 84321512, + "step": 1273 + }, + { + "epoch": 0.11915570739926054, + "loss": 1.3958415985107422, + "loss_ce": 0.0032635130919516087, + "loss_iou": 0.6171875, + "loss_num": 0.0322265625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 84321512, + "step": 1273 + }, + { + "epoch": 0.11924930968315627, + "grad_norm": 19.093976974487305, + "learning_rate": 5e-05, + "loss": 1.714, + "num_input_tokens_seen": 84386724, + "step": 1274 + }, + { + "epoch": 0.11924930968315627, + "loss": 1.7422585487365723, + "loss_ce": 0.00348901329562068, + "loss_iou": 0.70703125, + "loss_num": 0.06396484375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 84386724, + "step": 1274 + }, + { + "epoch": 0.11934291196705199, + "grad_norm": 14.970403671264648, + "learning_rate": 5e-05, + "loss": 1.5339, + "num_input_tokens_seen": 84454224, + "step": 1275 + }, + { + "epoch": 0.11934291196705199, + "loss": 1.63826584815979, + "loss_ce": 0.0069181350991129875, + "loss_iou": 0.65234375, + "loss_num": 0.06591796875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 84454224, + "step": 1275 + }, + { + "epoch": 0.11943651425094773, + "grad_norm": 19.01073455810547, + "learning_rate": 5e-05, + "loss": 1.421, + "num_input_tokens_seen": 84519860, + "step": 1276 + }, + { + "epoch": 0.11943651425094773, + "loss": 1.3654017448425293, + "loss_ce": 0.005050238221883774, + "loss_iou": 0.5625, + "loss_num": 0.047607421875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 84519860, + "step": 1276 + }, + { + "epoch": 0.11953011653484345, + "grad_norm": 16.294599533081055, + "learning_rate": 5e-05, + "loss": 1.5049, + "num_input_tokens_seen": 84585884, + "step": 1277 + }, + { + "epoch": 0.11953011653484345, + "loss": 1.465646505355835, + "loss_ce": 0.005197314545512199, + "loss_iou": 0.60546875, + "loss_num": 0.050048828125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 84585884, + "step": 1277 + }, + { + "epoch": 0.11962371881873918, + "grad_norm": 21.392253875732422, + "learning_rate": 5e-05, + "loss": 1.5868, + "num_input_tokens_seen": 84651872, + "step": 1278 + }, + { + "epoch": 0.11962371881873918, + "loss": 1.5563135147094727, + "loss_ce": 0.00260249525308609, + "loss_iou": 0.6640625, + "loss_num": 0.045166015625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 84651872, + "step": 1278 + }, + { + "epoch": 0.1197173211026349, + "grad_norm": 19.679719924926758, + "learning_rate": 5e-05, + "loss": 1.6347, + "num_input_tokens_seen": 84717280, + "step": 1279 + }, + { + "epoch": 0.1197173211026349, + "loss": 1.716318130493164, + "loss_ce": 0.0019625977147370577, + "loss_iou": 0.73828125, + "loss_num": 0.046875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 84717280, + "step": 1279 + }, + { + "epoch": 0.11981092338653063, + "grad_norm": 33.421905517578125, + "learning_rate": 5e-05, + "loss": 1.4694, + "num_input_tokens_seen": 84782716, + "step": 1280 + }, + { + "epoch": 0.11981092338653063, + "loss": 1.25675368309021, + "loss_ce": 0.003579774871468544, + "loss_iou": 0.54296875, + "loss_num": 0.03369140625, + "loss_xval": 1.25, + "num_input_tokens_seen": 84782716, + "step": 1280 + }, + { + "epoch": 0.11990452567042637, + "grad_norm": 27.822599411010742, + "learning_rate": 5e-05, + "loss": 1.8041, + "num_input_tokens_seen": 84848692, + "step": 1281 + }, + { + "epoch": 0.11990452567042637, + "loss": 1.7329094409942627, + "loss_ce": 0.0024407554883509874, + "loss_iou": 0.7734375, + "loss_num": 0.036376953125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 84848692, + "step": 1281 + }, + { + "epoch": 0.11999812795432209, + "grad_norm": 62.687049865722656, + "learning_rate": 5e-05, + "loss": 1.4717, + "num_input_tokens_seen": 84915340, + "step": 1282 + }, + { + "epoch": 0.11999812795432209, + "loss": 1.7012088298797607, + "loss_ce": 0.008826037868857384, + "loss_iou": 0.68359375, + "loss_num": 0.06494140625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 84915340, + "step": 1282 + }, + { + "epoch": 0.12009173023821781, + "grad_norm": 21.4544734954834, + "learning_rate": 5e-05, + "loss": 1.5386, + "num_input_tokens_seen": 84982344, + "step": 1283 + }, + { + "epoch": 0.12009173023821781, + "loss": 1.5494441986083984, + "loss_ce": 0.005498811602592468, + "loss_iou": 0.6953125, + "loss_num": 0.0311279296875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 84982344, + "step": 1283 + }, + { + "epoch": 0.12018533252211354, + "grad_norm": 22.630477905273438, + "learning_rate": 5e-05, + "loss": 1.4824, + "num_input_tokens_seen": 85049760, + "step": 1284 + }, + { + "epoch": 0.12018533252211354, + "loss": 1.25767183303833, + "loss_ce": 0.0013242331333458424, + "loss_iou": 0.5625, + "loss_num": 0.0257568359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 85049760, + "step": 1284 + }, + { + "epoch": 0.12027893480600926, + "grad_norm": 46.782806396484375, + "learning_rate": 5e-05, + "loss": 1.568, + "num_input_tokens_seen": 85116292, + "step": 1285 + }, + { + "epoch": 0.12027893480600926, + "loss": 1.672314167022705, + "loss_ce": 0.0033688938710838556, + "loss_iou": 0.6953125, + "loss_num": 0.0556640625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 85116292, + "step": 1285 + }, + { + "epoch": 0.12037253708990499, + "grad_norm": 22.50234031677246, + "learning_rate": 5e-05, + "loss": 1.7053, + "num_input_tokens_seen": 85181932, + "step": 1286 + }, + { + "epoch": 0.12037253708990499, + "loss": 1.8718600273132324, + "loss_ce": 0.006625790614634752, + "loss_iou": 0.8359375, + "loss_num": 0.0390625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 85181932, + "step": 1286 + }, + { + "epoch": 0.12046613937380073, + "grad_norm": 58.333805084228516, + "learning_rate": 5e-05, + "loss": 1.512, + "num_input_tokens_seen": 85248036, + "step": 1287 + }, + { + "epoch": 0.12046613937380073, + "loss": 1.6283907890319824, + "loss_ce": 0.013156358152627945, + "loss_iou": 0.72265625, + "loss_num": 0.034423828125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 85248036, + "step": 1287 + }, + { + "epoch": 0.12055974165769645, + "grad_norm": 15.645672798156738, + "learning_rate": 5e-05, + "loss": 1.3016, + "num_input_tokens_seen": 85314132, + "step": 1288 + }, + { + "epoch": 0.12055974165769645, + "loss": 1.2546794414520264, + "loss_ce": 0.0066325003281235695, + "loss_iou": 0.5625, + "loss_num": 0.025390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 85314132, + "step": 1288 + }, + { + "epoch": 0.12065334394159218, + "grad_norm": 21.752676010131836, + "learning_rate": 5e-05, + "loss": 1.406, + "num_input_tokens_seen": 85379904, + "step": 1289 + }, + { + "epoch": 0.12065334394159218, + "loss": 1.273632526397705, + "loss_ce": 0.0037351157516241074, + "loss_iou": 0.5546875, + "loss_num": 0.031494140625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 85379904, + "step": 1289 + }, + { + "epoch": 0.1207469462254879, + "grad_norm": 80.36859130859375, + "learning_rate": 5e-05, + "loss": 1.5521, + "num_input_tokens_seen": 85446272, + "step": 1290 + }, + { + "epoch": 0.1207469462254879, + "loss": 1.5876295566558838, + "loss_ce": 0.006574748083949089, + "loss_iou": 0.6796875, + "loss_num": 0.0439453125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 85446272, + "step": 1290 + }, + { + "epoch": 0.12084054850938362, + "grad_norm": 17.155616760253906, + "learning_rate": 5e-05, + "loss": 1.8221, + "num_input_tokens_seen": 85513108, + "step": 1291 + }, + { + "epoch": 0.12084054850938362, + "loss": 1.7568233013153076, + "loss_ce": 0.002917001722380519, + "loss_iou": 0.74609375, + "loss_num": 0.0517578125, + "loss_xval": 1.75, + "num_input_tokens_seen": 85513108, + "step": 1291 + }, + { + "epoch": 0.12093415079327935, + "grad_norm": 13.691561698913574, + "learning_rate": 5e-05, + "loss": 1.3615, + "num_input_tokens_seen": 85580328, + "step": 1292 + }, + { + "epoch": 0.12093415079327935, + "loss": 1.453429937362671, + "loss_ce": 0.0027464372105896473, + "loss_iou": 0.61328125, + "loss_num": 0.044921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 85580328, + "step": 1292 + }, + { + "epoch": 0.12102775307717509, + "grad_norm": 32.56606674194336, + "learning_rate": 5e-05, + "loss": 1.2465, + "num_input_tokens_seen": 85646888, + "step": 1293 + }, + { + "epoch": 0.12102775307717509, + "loss": 1.2406527996063232, + "loss_ce": 0.0023715831339359283, + "loss_iou": 0.5625, + "loss_num": 0.0234375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 85646888, + "step": 1293 + }, + { + "epoch": 0.12112135536107081, + "grad_norm": 22.93648338317871, + "learning_rate": 5e-05, + "loss": 1.7601, + "num_input_tokens_seen": 85712936, + "step": 1294 + }, + { + "epoch": 0.12112135536107081, + "loss": 1.686873197555542, + "loss_ce": 0.00425603287294507, + "loss_iou": 0.734375, + "loss_num": 0.04248046875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 85712936, + "step": 1294 + }, + { + "epoch": 0.12121495764496654, + "grad_norm": 14.054872512817383, + "learning_rate": 5e-05, + "loss": 1.5023, + "num_input_tokens_seen": 85779100, + "step": 1295 + }, + { + "epoch": 0.12121495764496654, + "loss": 1.3477599620819092, + "loss_ce": 0.004986626096069813, + "loss_iou": 0.58203125, + "loss_num": 0.035888671875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 85779100, + "step": 1295 + }, + { + "epoch": 0.12130855992886226, + "grad_norm": 13.445719718933105, + "learning_rate": 5e-05, + "loss": 1.3279, + "num_input_tokens_seen": 85844692, + "step": 1296 + }, + { + "epoch": 0.12130855992886226, + "loss": 1.2723405361175537, + "loss_ce": 0.004518401343375444, + "loss_iou": 0.5390625, + "loss_num": 0.038818359375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 85844692, + "step": 1296 + }, + { + "epoch": 0.12140216221275799, + "grad_norm": 12.350967407226562, + "learning_rate": 5e-05, + "loss": 1.0803, + "num_input_tokens_seen": 85910448, + "step": 1297 + }, + { + "epoch": 0.12140216221275799, + "loss": 1.0261882543563843, + "loss_ce": 0.003116979030892253, + "loss_iou": 0.458984375, + "loss_num": 0.0211181640625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 85910448, + "step": 1297 + }, + { + "epoch": 0.12149576449665372, + "grad_norm": 20.45875358581543, + "learning_rate": 5e-05, + "loss": 1.3162, + "num_input_tokens_seen": 85976348, + "step": 1298 + }, + { + "epoch": 0.12149576449665372, + "loss": 1.2711149454116821, + "loss_ce": 0.005978184752166271, + "loss_iou": 0.51953125, + "loss_num": 0.0458984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 85976348, + "step": 1298 + }, + { + "epoch": 0.12158936678054945, + "grad_norm": 19.14122200012207, + "learning_rate": 5e-05, + "loss": 1.4476, + "num_input_tokens_seen": 86042828, + "step": 1299 + }, + { + "epoch": 0.12158936678054945, + "loss": 1.340684413909912, + "loss_ce": 0.006455985363572836, + "loss_iou": 0.59375, + "loss_num": 0.029052734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 86042828, + "step": 1299 + }, + { + "epoch": 0.12168296906444517, + "grad_norm": 25.907560348510742, + "learning_rate": 5e-05, + "loss": 1.5056, + "num_input_tokens_seen": 86109824, + "step": 1300 + }, + { + "epoch": 0.12168296906444517, + "loss": 1.396787166595459, + "loss_ce": 0.008054189383983612, + "loss_iou": 0.63671875, + "loss_num": 0.0234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 86109824, + "step": 1300 + }, + { + "epoch": 0.1217765713483409, + "grad_norm": 20.510488510131836, + "learning_rate": 5e-05, + "loss": 1.6723, + "num_input_tokens_seen": 86175660, + "step": 1301 + }, + { + "epoch": 0.1217765713483409, + "loss": 1.8065403699874878, + "loss_ce": 0.004782572388648987, + "loss_iou": 0.75390625, + "loss_num": 0.0595703125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 86175660, + "step": 1301 + }, + { + "epoch": 0.12187017363223662, + "grad_norm": 13.6410551071167, + "learning_rate": 5e-05, + "loss": 1.4309, + "num_input_tokens_seen": 86241376, + "step": 1302 + }, + { + "epoch": 0.12187017363223662, + "loss": 1.1717824935913086, + "loss_ce": 0.004057946149259806, + "loss_iou": 0.4375, + "loss_num": 0.058837890625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 86241376, + "step": 1302 + }, + { + "epoch": 0.12196377591613235, + "grad_norm": 22.48175048828125, + "learning_rate": 5e-05, + "loss": 1.3254, + "num_input_tokens_seen": 86308068, + "step": 1303 + }, + { + "epoch": 0.12196377591613235, + "loss": 1.351536750793457, + "loss_ce": 0.008274968713521957, + "loss_iou": 0.515625, + "loss_num": 0.06201171875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 86308068, + "step": 1303 + }, + { + "epoch": 0.12205737820002809, + "grad_norm": 21.522815704345703, + "learning_rate": 5e-05, + "loss": 1.4996, + "num_input_tokens_seen": 86374600, + "step": 1304 + }, + { + "epoch": 0.12205737820002809, + "loss": 1.4055016040802002, + "loss_ce": 0.004134302027523518, + "loss_iou": 0.609375, + "loss_num": 0.035888671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 86374600, + "step": 1304 + }, + { + "epoch": 0.12215098048392381, + "grad_norm": 18.598857879638672, + "learning_rate": 5e-05, + "loss": 1.5089, + "num_input_tokens_seen": 86440616, + "step": 1305 + }, + { + "epoch": 0.12215098048392381, + "loss": 1.826608657836914, + "loss_ce": 0.007272652816027403, + "loss_iou": 0.76953125, + "loss_num": 0.0556640625, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 86440616, + "step": 1305 + }, + { + "epoch": 0.12224458276781953, + "grad_norm": 26.113828659057617, + "learning_rate": 5e-05, + "loss": 1.4685, + "num_input_tokens_seen": 86506688, + "step": 1306 + }, + { + "epoch": 0.12224458276781953, + "loss": 1.4255645275115967, + "loss_ce": 0.0017364441882818937, + "loss_iou": 0.6328125, + "loss_num": 0.0322265625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 86506688, + "step": 1306 + }, + { + "epoch": 0.12233818505171526, + "grad_norm": 36.879798889160156, + "learning_rate": 5e-05, + "loss": 1.5751, + "num_input_tokens_seen": 86571828, + "step": 1307 + }, + { + "epoch": 0.12233818505171526, + "loss": 1.6650397777557373, + "loss_ce": 0.006836687680333853, + "loss_iou": 0.703125, + "loss_num": 0.05078125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 86571828, + "step": 1307 + }, + { + "epoch": 0.12243178733561098, + "grad_norm": 23.046838760375977, + "learning_rate": 5e-05, + "loss": 1.9209, + "num_input_tokens_seen": 86638472, + "step": 1308 + }, + { + "epoch": 0.12243178733561098, + "loss": 2.05283260345459, + "loss_ce": 0.0010746953776106238, + "loss_iou": 0.875, + "loss_num": 0.060791015625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 86638472, + "step": 1308 + }, + { + "epoch": 0.12252538961950672, + "grad_norm": 17.48950958251953, + "learning_rate": 5e-05, + "loss": 1.5904, + "num_input_tokens_seen": 86703256, + "step": 1309 + }, + { + "epoch": 0.12252538961950672, + "loss": 1.7254270315170288, + "loss_ce": 0.004723929800093174, + "loss_iou": 0.7421875, + "loss_num": 0.0478515625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 86703256, + "step": 1309 + }, + { + "epoch": 0.12261899190340245, + "grad_norm": 27.78485679626465, + "learning_rate": 5e-05, + "loss": 1.6184, + "num_input_tokens_seen": 86768240, + "step": 1310 + }, + { + "epoch": 0.12261899190340245, + "loss": 1.8453835248947144, + "loss_ce": 0.006516415625810623, + "loss_iou": 0.796875, + "loss_num": 0.048828125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 86768240, + "step": 1310 + }, + { + "epoch": 0.12271259418729817, + "grad_norm": 29.50354766845703, + "learning_rate": 5e-05, + "loss": 1.5398, + "num_input_tokens_seen": 86834552, + "step": 1311 + }, + { + "epoch": 0.12271259418729817, + "loss": 1.3262726068496704, + "loss_ce": 0.0039764754474163055, + "loss_iou": 0.58984375, + "loss_num": 0.0286865234375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 86834552, + "step": 1311 + }, + { + "epoch": 0.1228061964711939, + "grad_norm": 11.42076587677002, + "learning_rate": 5e-05, + "loss": 1.5043, + "num_input_tokens_seen": 86900968, + "step": 1312 + }, + { + "epoch": 0.1228061964711939, + "loss": 1.376326322555542, + "loss_ce": 0.0013263248838484287, + "loss_iou": 0.5859375, + "loss_num": 0.041015625, + "loss_xval": 1.375, + "num_input_tokens_seen": 86900968, + "step": 1312 + }, + { + "epoch": 0.12289979875508962, + "grad_norm": 26.578754425048828, + "learning_rate": 5e-05, + "loss": 1.4276, + "num_input_tokens_seen": 86966816, + "step": 1313 + }, + { + "epoch": 0.12289979875508962, + "loss": 1.2679071426391602, + "loss_ce": 0.0027704723179340363, + "loss_iou": 0.55859375, + "loss_num": 0.0303955078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 86966816, + "step": 1313 + }, + { + "epoch": 0.12299340103898534, + "grad_norm": 19.72821044921875, + "learning_rate": 5e-05, + "loss": 1.5978, + "num_input_tokens_seen": 87033808, + "step": 1314 + }, + { + "epoch": 0.12299340103898534, + "loss": 1.5294857025146484, + "loss_ce": 0.0026302344631403685, + "loss_iou": 0.625, + "loss_num": 0.054931640625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 87033808, + "step": 1314 + }, + { + "epoch": 0.12308700332288108, + "grad_norm": 15.73732852935791, + "learning_rate": 5e-05, + "loss": 1.4708, + "num_input_tokens_seen": 87100612, + "step": 1315 + }, + { + "epoch": 0.12308700332288108, + "loss": 1.5097014904022217, + "loss_ce": 0.004330409690737724, + "loss_iou": 0.6171875, + "loss_num": 0.05517578125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 87100612, + "step": 1315 + }, + { + "epoch": 0.12318060560677681, + "grad_norm": 20.726572036743164, + "learning_rate": 5e-05, + "loss": 1.6119, + "num_input_tokens_seen": 87166924, + "step": 1316 + }, + { + "epoch": 0.12318060560677681, + "loss": 1.7098352909088135, + "loss_ce": 0.0008508390747010708, + "loss_iou": 0.73828125, + "loss_num": 0.046630859375, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 87166924, + "step": 1316 + }, + { + "epoch": 0.12327420789067253, + "grad_norm": 31.550519943237305, + "learning_rate": 5e-05, + "loss": 1.6269, + "num_input_tokens_seen": 87233908, + "step": 1317 + }, + { + "epoch": 0.12327420789067253, + "loss": 1.921507716178894, + "loss_ce": 0.0015858153346925974, + "loss_iou": 0.79296875, + "loss_num": 0.06591796875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 87233908, + "step": 1317 + }, + { + "epoch": 0.12336781017456826, + "grad_norm": 19.12386703491211, + "learning_rate": 5e-05, + "loss": 1.8412, + "num_input_tokens_seen": 87300564, + "step": 1318 + }, + { + "epoch": 0.12336781017456826, + "loss": 1.8357542753219604, + "loss_ce": 0.011535624042153358, + "loss_iou": 0.71484375, + "loss_num": 0.07861328125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 87300564, + "step": 1318 + }, + { + "epoch": 0.12346141245846398, + "grad_norm": 21.08283805847168, + "learning_rate": 5e-05, + "loss": 1.6531, + "num_input_tokens_seen": 87368060, + "step": 1319 + }, + { + "epoch": 0.12346141245846398, + "loss": 1.6483509540557861, + "loss_ce": 0.0047962842509150505, + "loss_iou": 0.71875, + "loss_num": 0.04150390625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 87368060, + "step": 1319 + }, + { + "epoch": 0.12355501474235972, + "grad_norm": 75.24449920654297, + "learning_rate": 5e-05, + "loss": 1.5335, + "num_input_tokens_seen": 87433764, + "step": 1320 + }, + { + "epoch": 0.12355501474235972, + "loss": 1.4258882999420166, + "loss_ce": 0.004013323690742254, + "loss_iou": 0.609375, + "loss_num": 0.041015625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 87433764, + "step": 1320 + }, + { + "epoch": 0.12364861702625544, + "grad_norm": 24.328025817871094, + "learning_rate": 5e-05, + "loss": 1.5936, + "num_input_tokens_seen": 87499040, + "step": 1321 + }, + { + "epoch": 0.12364861702625544, + "loss": 1.732417345046997, + "loss_ce": 0.0058548226952552795, + "loss_iou": 0.70703125, + "loss_num": 0.061767578125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 87499040, + "step": 1321 + }, + { + "epoch": 0.12374221931015117, + "grad_norm": 22.555723190307617, + "learning_rate": 5e-05, + "loss": 1.438, + "num_input_tokens_seen": 87565064, + "step": 1322 + }, + { + "epoch": 0.12374221931015117, + "loss": 1.4071223735809326, + "loss_ce": 0.0023371789138764143, + "loss_iou": 0.61328125, + "loss_num": 0.0361328125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 87565064, + "step": 1322 + }, + { + "epoch": 0.1238358215940469, + "grad_norm": 16.10475730895996, + "learning_rate": 5e-05, + "loss": 1.7528, + "num_input_tokens_seen": 87631036, + "step": 1323 + }, + { + "epoch": 0.1238358215940469, + "loss": 1.6155085563659668, + "loss_ce": 0.006133580580353737, + "loss_iou": 0.70703125, + "loss_num": 0.03955078125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 87631036, + "step": 1323 + }, + { + "epoch": 0.12392942387794262, + "grad_norm": 14.831561088562012, + "learning_rate": 5e-05, + "loss": 1.5513, + "num_input_tokens_seen": 87697400, + "step": 1324 + }, + { + "epoch": 0.12392942387794262, + "loss": 1.2704427242279053, + "loss_ce": 0.004817690700292587, + "loss_iou": 0.5546875, + "loss_num": 0.031494140625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 87697400, + "step": 1324 + }, + { + "epoch": 0.12402302616183834, + "grad_norm": 32.889522552490234, + "learning_rate": 5e-05, + "loss": 1.2469, + "num_input_tokens_seen": 87763576, + "step": 1325 + }, + { + "epoch": 0.12402302616183834, + "loss": 1.2827637195587158, + "loss_ce": 0.0044434089213609695, + "loss_iou": 0.5703125, + "loss_num": 0.0274658203125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 87763576, + "step": 1325 + }, + { + "epoch": 0.12411662844573408, + "grad_norm": 15.29900074005127, + "learning_rate": 5e-05, + "loss": 1.7251, + "num_input_tokens_seen": 87830268, + "step": 1326 + }, + { + "epoch": 0.12411662844573408, + "loss": 1.6326643228530884, + "loss_ce": 0.002781424205750227, + "loss_iou": 0.69921875, + "loss_num": 0.045654296875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 87830268, + "step": 1326 + }, + { + "epoch": 0.1242102307296298, + "grad_norm": 44.64360427856445, + "learning_rate": 5e-05, + "loss": 1.5381, + "num_input_tokens_seen": 87896616, + "step": 1327 + }, + { + "epoch": 0.1242102307296298, + "loss": 1.5737522840499878, + "loss_ce": 0.008322535082697868, + "loss_iou": 0.65625, + "loss_num": 0.0517578125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 87896616, + "step": 1327 + }, + { + "epoch": 0.12430383301352553, + "grad_norm": 113.04426574707031, + "learning_rate": 5e-05, + "loss": 1.5927, + "num_input_tokens_seen": 87962076, + "step": 1328 + }, + { + "epoch": 0.12430383301352553, + "loss": 1.9099640846252441, + "loss_ce": 0.00859688688069582, + "loss_iou": 0.80078125, + "loss_num": 0.060546875, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 87962076, + "step": 1328 + }, + { + "epoch": 0.12439743529742125, + "grad_norm": 16.400493621826172, + "learning_rate": 5e-05, + "loss": 1.5002, + "num_input_tokens_seen": 88029696, + "step": 1329 + }, + { + "epoch": 0.12439743529742125, + "loss": 1.7915959358215332, + "loss_ce": 0.005463153589516878, + "loss_iou": 0.765625, + "loss_num": 0.05126953125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 88029696, + "step": 1329 + }, + { + "epoch": 0.12449103758131698, + "grad_norm": 18.444204330444336, + "learning_rate": 5e-05, + "loss": 1.3574, + "num_input_tokens_seen": 88096584, + "step": 1330 + }, + { + "epoch": 0.12449103758131698, + "loss": 1.3137296438217163, + "loss_ce": 0.006112488452345133, + "loss_iou": 0.5703125, + "loss_num": 0.0341796875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 88096584, + "step": 1330 + }, + { + "epoch": 0.12458463986521272, + "grad_norm": 50.89994430541992, + "learning_rate": 5e-05, + "loss": 1.9221, + "num_input_tokens_seen": 88163308, + "step": 1331 + }, + { + "epoch": 0.12458463986521272, + "loss": 1.9844614267349243, + "loss_ce": 0.004969221539795399, + "loss_iou": 0.83984375, + "loss_num": 0.060546875, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 88163308, + "step": 1331 + }, + { + "epoch": 0.12467824214910844, + "grad_norm": 16.569965362548828, + "learning_rate": 5e-05, + "loss": 1.6601, + "num_input_tokens_seen": 88229560, + "step": 1332 + }, + { + "epoch": 0.12467824214910844, + "loss": 1.6772743463516235, + "loss_ce": 0.003446259070187807, + "loss_iou": 0.73828125, + "loss_num": 0.038818359375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 88229560, + "step": 1332 + }, + { + "epoch": 0.12477184443300417, + "grad_norm": 21.92986297607422, + "learning_rate": 5e-05, + "loss": 1.5063, + "num_input_tokens_seen": 88296360, + "step": 1333 + }, + { + "epoch": 0.12477184443300417, + "loss": 1.5332324504852295, + "loss_ce": 0.004667984321713448, + "loss_iou": 0.6328125, + "loss_num": 0.05224609375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 88296360, + "step": 1333 + }, + { + "epoch": 0.12486544671689989, + "grad_norm": 19.285966873168945, + "learning_rate": 5e-05, + "loss": 1.2684, + "num_input_tokens_seen": 88362000, + "step": 1334 + }, + { + "epoch": 0.12486544671689989, + "loss": 1.2381023168563843, + "loss_ce": 0.005680470261722803, + "loss_iou": 0.515625, + "loss_num": 0.040771484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 88362000, + "step": 1334 + }, + { + "epoch": 0.12495904900079562, + "grad_norm": 27.227636337280273, + "learning_rate": 5e-05, + "loss": 1.6714, + "num_input_tokens_seen": 88428384, + "step": 1335 + }, + { + "epoch": 0.12495904900079562, + "loss": 1.6423685550689697, + "loss_ce": 0.00564984604716301, + "loss_iou": 0.70703125, + "loss_num": 0.044677734375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 88428384, + "step": 1335 + }, + { + "epoch": 0.12505265128469134, + "grad_norm": 23.74907684326172, + "learning_rate": 5e-05, + "loss": 1.6194, + "num_input_tokens_seen": 88495332, + "step": 1336 + }, + { + "epoch": 0.12505265128469134, + "loss": 1.825437307357788, + "loss_ce": 0.00707800779491663, + "loss_iou": 0.7265625, + "loss_num": 0.0732421875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 88495332, + "step": 1336 + }, + { + "epoch": 0.12514625356858708, + "grad_norm": 16.62088394165039, + "learning_rate": 5e-05, + "loss": 1.485, + "num_input_tokens_seen": 88562248, + "step": 1337 + }, + { + "epoch": 0.12514625356858708, + "loss": 1.620102047920227, + "loss_ce": 0.0038911611773073673, + "loss_iou": 0.65625, + "loss_num": 0.061279296875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 88562248, + "step": 1337 + }, + { + "epoch": 0.1252398558524828, + "grad_norm": 40.16783142089844, + "learning_rate": 5e-05, + "loss": 1.3248, + "num_input_tokens_seen": 88629452, + "step": 1338 + }, + { + "epoch": 0.1252398558524828, + "loss": 1.24601411819458, + "loss_ce": 0.002849954180419445, + "loss_iou": 0.54296875, + "loss_num": 0.0311279296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 88629452, + "step": 1338 + }, + { + "epoch": 0.12533345813637853, + "grad_norm": 19.44478988647461, + "learning_rate": 5e-05, + "loss": 1.4632, + "num_input_tokens_seen": 88697664, + "step": 1339 + }, + { + "epoch": 0.12533345813637853, + "loss": 1.550034999847412, + "loss_ce": 0.0012068809010088444, + "loss_iou": 0.640625, + "loss_num": 0.05419921875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 88697664, + "step": 1339 + }, + { + "epoch": 0.12542706042027427, + "grad_norm": 15.983454704284668, + "learning_rate": 5e-05, + "loss": 1.4638, + "num_input_tokens_seen": 88763948, + "step": 1340 + }, + { + "epoch": 0.12542706042027427, + "loss": 1.661836862564087, + "loss_ce": 0.007540082558989525, + "loss_iou": 0.703125, + "loss_num": 0.050048828125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 88763948, + "step": 1340 + }, + { + "epoch": 0.12552066270416998, + "grad_norm": 18.1462345123291, + "learning_rate": 5e-05, + "loss": 1.6568, + "num_input_tokens_seen": 88829860, + "step": 1341 + }, + { + "epoch": 0.12552066270416998, + "loss": 1.7739043235778809, + "loss_ce": 0.003396473592147231, + "loss_iou": 0.765625, + "loss_num": 0.048095703125, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 88829860, + "step": 1341 + }, + { + "epoch": 0.12561426498806572, + "grad_norm": 23.744892120361328, + "learning_rate": 5e-05, + "loss": 1.5271, + "num_input_tokens_seen": 88895908, + "step": 1342 + }, + { + "epoch": 0.12561426498806572, + "loss": 1.634044885635376, + "loss_ce": 0.009044930338859558, + "loss_iou": 0.6640625, + "loss_num": 0.0595703125, + "loss_xval": 1.625, + "num_input_tokens_seen": 88895908, + "step": 1342 + }, + { + "epoch": 0.12570786727196143, + "grad_norm": 47.277591705322266, + "learning_rate": 5e-05, + "loss": 1.4038, + "num_input_tokens_seen": 88962172, + "step": 1343 + }, + { + "epoch": 0.12570786727196143, + "loss": 1.409563660621643, + "loss_ce": 0.008196476846933365, + "loss_iou": 0.63671875, + "loss_num": 0.02587890625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 88962172, + "step": 1343 + }, + { + "epoch": 0.12580146955585716, + "grad_norm": 39.08584976196289, + "learning_rate": 5e-05, + "loss": 1.5426, + "num_input_tokens_seen": 89027524, + "step": 1344 + }, + { + "epoch": 0.12580146955585716, + "loss": 1.649590253829956, + "loss_ce": 0.007988580502569675, + "loss_iou": 0.74609375, + "loss_num": 0.03076171875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 89027524, + "step": 1344 + }, + { + "epoch": 0.1258950718397529, + "grad_norm": 16.79268455505371, + "learning_rate": 5e-05, + "loss": 1.8017, + "num_input_tokens_seen": 89092648, + "step": 1345 + }, + { + "epoch": 0.1258950718397529, + "loss": 1.7879549264907837, + "loss_ce": 0.003775215707719326, + "loss_iou": 0.7734375, + "loss_num": 0.0478515625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 89092648, + "step": 1345 + }, + { + "epoch": 0.12598867412364861, + "grad_norm": 21.5550537109375, + "learning_rate": 5e-05, + "loss": 1.4655, + "num_input_tokens_seen": 89159212, + "step": 1346 + }, + { + "epoch": 0.12598867412364861, + "loss": 1.4311448335647583, + "loss_ce": 0.005363560281693935, + "loss_iou": 0.62109375, + "loss_num": 0.036376953125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 89159212, + "step": 1346 + }, + { + "epoch": 0.12608227640754435, + "grad_norm": 19.53887939453125, + "learning_rate": 5e-05, + "loss": 1.5629, + "num_input_tokens_seen": 89225420, + "step": 1347 + }, + { + "epoch": 0.12608227640754435, + "loss": 1.3539124727249146, + "loss_ce": 0.002349911257624626, + "loss_iou": 0.57421875, + "loss_num": 0.040283203125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 89225420, + "step": 1347 + }, + { + "epoch": 0.12617587869144006, + "grad_norm": 18.233774185180664, + "learning_rate": 5e-05, + "loss": 1.6367, + "num_input_tokens_seen": 89291752, + "step": 1348 + }, + { + "epoch": 0.12617587869144006, + "loss": 1.6218806505203247, + "loss_ce": 0.0027399638202041388, + "loss_iou": 0.68359375, + "loss_num": 0.05029296875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 89291752, + "step": 1348 + }, + { + "epoch": 0.1262694809753358, + "grad_norm": 56.429054260253906, + "learning_rate": 5e-05, + "loss": 1.6564, + "num_input_tokens_seen": 89359116, + "step": 1349 + }, + { + "epoch": 0.1262694809753358, + "loss": 1.5903823375701904, + "loss_ce": 0.0034682718105614185, + "loss_iou": 0.71875, + "loss_num": 0.0296630859375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 89359116, + "step": 1349 + }, + { + "epoch": 0.1263630832592315, + "grad_norm": 15.590723037719727, + "learning_rate": 5e-05, + "loss": 1.5935, + "num_input_tokens_seen": 89424872, + "step": 1350 + }, + { + "epoch": 0.1263630832592315, + "loss": 1.5478242635726929, + "loss_ce": 0.00851763878017664, + "loss_iou": 0.6484375, + "loss_num": 0.048828125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 89424872, + "step": 1350 + }, + { + "epoch": 0.12645668554312725, + "grad_norm": 13.77322769165039, + "learning_rate": 5e-05, + "loss": 1.4238, + "num_input_tokens_seen": 89492072, + "step": 1351 + }, + { + "epoch": 0.12645668554312725, + "loss": 1.4697320461273193, + "loss_ce": 0.0034233955666422844, + "loss_iou": 0.625, + "loss_num": 0.0439453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 89492072, + "step": 1351 + }, + { + "epoch": 0.126550287827023, + "grad_norm": 13.505005836486816, + "learning_rate": 5e-05, + "loss": 1.5099, + "num_input_tokens_seen": 89558472, + "step": 1352 + }, + { + "epoch": 0.126550287827023, + "loss": 1.5141277313232422, + "loss_ce": 0.0014323859941214323, + "loss_iou": 0.66796875, + "loss_num": 0.03466796875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 89558472, + "step": 1352 + }, + { + "epoch": 0.1266438901109187, + "grad_norm": 16.373979568481445, + "learning_rate": 5e-05, + "loss": 1.3335, + "num_input_tokens_seen": 89623992, + "step": 1353 + }, + { + "epoch": 0.1266438901109187, + "loss": 1.3113393783569336, + "loss_ce": 0.004363037645816803, + "loss_iou": 0.5546875, + "loss_num": 0.03955078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 89623992, + "step": 1353 + }, + { + "epoch": 0.12673749239481444, + "grad_norm": 20.523517608642578, + "learning_rate": 5e-05, + "loss": 1.7889, + "num_input_tokens_seen": 89690240, + "step": 1354 + }, + { + "epoch": 0.12673749239481444, + "loss": 1.754321813583374, + "loss_ce": 0.009204620495438576, + "loss_iou": 0.73046875, + "loss_num": 0.05712890625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 89690240, + "step": 1354 + }, + { + "epoch": 0.12683109467871015, + "grad_norm": 19.48271942138672, + "learning_rate": 5e-05, + "loss": 1.7846, + "num_input_tokens_seen": 89756784, + "step": 1355 + }, + { + "epoch": 0.12683109467871015, + "loss": 1.9620779752731323, + "loss_ce": 0.005046608857810497, + "loss_iou": 0.8359375, + "loss_num": 0.056884765625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 89756784, + "step": 1355 + }, + { + "epoch": 0.1269246969626059, + "grad_norm": 45.645416259765625, + "learning_rate": 5e-05, + "loss": 1.631, + "num_input_tokens_seen": 89822608, + "step": 1356 + }, + { + "epoch": 0.1269246969626059, + "loss": 1.6072031259536743, + "loss_ce": 0.004175771027803421, + "loss_iou": 0.66015625, + "loss_num": 0.057373046875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 89822608, + "step": 1356 + }, + { + "epoch": 0.12701829924650163, + "grad_norm": 248.37985229492188, + "learning_rate": 5e-05, + "loss": 1.8808, + "num_input_tokens_seen": 89889216, + "step": 1357 + }, + { + "epoch": 0.12701829924650163, + "loss": 1.7766605615615845, + "loss_ce": 0.0051762256771326065, + "loss_iou": 0.76953125, + "loss_num": 0.046630859375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 89889216, + "step": 1357 + }, + { + "epoch": 0.12711190153039734, + "grad_norm": 22.761634826660156, + "learning_rate": 5e-05, + "loss": 1.5701, + "num_input_tokens_seen": 89955564, + "step": 1358 + }, + { + "epoch": 0.12711190153039734, + "loss": 1.3086109161376953, + "loss_ce": 0.0034352345392107964, + "loss_iou": 0.5546875, + "loss_num": 0.039794921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 89955564, + "step": 1358 + }, + { + "epoch": 0.12720550381429307, + "grad_norm": 28.448043823242188, + "learning_rate": 5e-05, + "loss": 1.3864, + "num_input_tokens_seen": 90020936, + "step": 1359 + }, + { + "epoch": 0.12720550381429307, + "loss": 1.394110918045044, + "loss_ce": 0.0034249001182615757, + "loss_iou": 0.6015625, + "loss_num": 0.037841796875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 90020936, + "step": 1359 + }, + { + "epoch": 0.12729910609818879, + "grad_norm": 17.549392700195312, + "learning_rate": 5e-05, + "loss": 1.5986, + "num_input_tokens_seen": 90086248, + "step": 1360 + }, + { + "epoch": 0.12729910609818879, + "loss": 1.6289432048797607, + "loss_ce": 0.007849406450986862, + "loss_iou": 0.68359375, + "loss_num": 0.0517578125, + "loss_xval": 1.625, + "num_input_tokens_seen": 90086248, + "step": 1360 + }, + { + "epoch": 0.12739270838208452, + "grad_norm": 11.626138687133789, + "learning_rate": 5e-05, + "loss": 1.471, + "num_input_tokens_seen": 90153580, + "step": 1361 + }, + { + "epoch": 0.12739270838208452, + "loss": 1.4268728494644165, + "loss_ce": 0.006950935814529657, + "loss_iou": 0.59765625, + "loss_num": 0.044189453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 90153580, + "step": 1361 + }, + { + "epoch": 0.12748631066598026, + "grad_norm": 154.85272216796875, + "learning_rate": 5e-05, + "loss": 1.3774, + "num_input_tokens_seen": 90220064, + "step": 1362 + }, + { + "epoch": 0.12748631066598026, + "loss": 1.483375072479248, + "loss_ce": 0.0038829362019896507, + "loss_iou": 0.61328125, + "loss_num": 0.05126953125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 90220064, + "step": 1362 + }, + { + "epoch": 0.12757991294987597, + "grad_norm": 25.236352920532227, + "learning_rate": 5e-05, + "loss": 1.4006, + "num_input_tokens_seen": 90286288, + "step": 1363 + }, + { + "epoch": 0.12757991294987597, + "loss": 1.4804292917251587, + "loss_ce": 0.004599159583449364, + "loss_iou": 0.61328125, + "loss_num": 0.049560546875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 90286288, + "step": 1363 + }, + { + "epoch": 0.1276735152337717, + "grad_norm": 35.36727523803711, + "learning_rate": 5e-05, + "loss": 1.5822, + "num_input_tokens_seen": 90352244, + "step": 1364 + }, + { + "epoch": 0.1276735152337717, + "loss": 1.286836862564087, + "loss_ce": 0.0072957719676196575, + "loss_iou": 0.5390625, + "loss_num": 0.0400390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 90352244, + "step": 1364 + }, + { + "epoch": 0.12776711751766742, + "grad_norm": 17.38477897644043, + "learning_rate": 5e-05, + "loss": 1.6278, + "num_input_tokens_seen": 90418872, + "step": 1365 + }, + { + "epoch": 0.12776711751766742, + "loss": 1.782911777496338, + "loss_ce": 0.003614937188103795, + "loss_iou": 0.7890625, + "loss_num": 0.04052734375, + "loss_xval": 1.78125, + "num_input_tokens_seen": 90418872, + "step": 1365 + }, + { + "epoch": 0.12786071980156316, + "grad_norm": 85.57073974609375, + "learning_rate": 5e-05, + "loss": 1.4069, + "num_input_tokens_seen": 90485600, + "step": 1366 + }, + { + "epoch": 0.12786071980156316, + "loss": 1.5200433731079102, + "loss_ce": 0.001488764537498355, + "loss_iou": 0.66015625, + "loss_num": 0.0400390625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 90485600, + "step": 1366 + }, + { + "epoch": 0.12795432208545887, + "grad_norm": 21.695579528808594, + "learning_rate": 5e-05, + "loss": 1.5567, + "num_input_tokens_seen": 90552584, + "step": 1367 + }, + { + "epoch": 0.12795432208545887, + "loss": 1.4326601028442383, + "loss_ce": 0.0010194204514846206, + "loss_iou": 0.63671875, + "loss_num": 0.03125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 90552584, + "step": 1367 + }, + { + "epoch": 0.1280479243693546, + "grad_norm": 23.40521812438965, + "learning_rate": 5e-05, + "loss": 1.5361, + "num_input_tokens_seen": 90618684, + "step": 1368 + }, + { + "epoch": 0.1280479243693546, + "loss": 1.5466312170028687, + "loss_ce": 0.0095218475908041, + "loss_iou": 0.6953125, + "loss_num": 0.0284423828125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 90618684, + "step": 1368 + }, + { + "epoch": 0.12814152665325035, + "grad_norm": 18.414335250854492, + "learning_rate": 5e-05, + "loss": 1.5375, + "num_input_tokens_seen": 90684404, + "step": 1369 + }, + { + "epoch": 0.12814152665325035, + "loss": 1.5048866271972656, + "loss_ce": 0.0058631375432014465, + "loss_iou": 0.62890625, + "loss_num": 0.04833984375, + "loss_xval": 1.5, + "num_input_tokens_seen": 90684404, + "step": 1369 + }, + { + "epoch": 0.12823512893714606, + "grad_norm": 17.33824920654297, + "learning_rate": 5e-05, + "loss": 1.2944, + "num_input_tokens_seen": 90751324, + "step": 1370 + }, + { + "epoch": 0.12823512893714606, + "loss": 1.223867416381836, + "loss_ce": 0.004629106260836124, + "loss_iou": 0.5234375, + "loss_num": 0.034912109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 90751324, + "step": 1370 + }, + { + "epoch": 0.1283287312210418, + "grad_norm": 25.81096649169922, + "learning_rate": 5e-05, + "loss": 1.5307, + "num_input_tokens_seen": 90818100, + "step": 1371 + }, + { + "epoch": 0.1283287312210418, + "loss": 1.6869189739227295, + "loss_ce": 0.009184468537569046, + "loss_iou": 0.6875, + "loss_num": 0.060791015625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 90818100, + "step": 1371 + }, + { + "epoch": 0.1284223335049375, + "grad_norm": 18.021596908569336, + "learning_rate": 5e-05, + "loss": 1.3482, + "num_input_tokens_seen": 90884120, + "step": 1372 + }, + { + "epoch": 0.1284223335049375, + "loss": 1.5917011499404907, + "loss_ce": 0.0057636527344584465, + "loss_iou": 0.6640625, + "loss_num": 0.052734375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 90884120, + "step": 1372 + }, + { + "epoch": 0.12851593578883325, + "grad_norm": 27.08411979675293, + "learning_rate": 5e-05, + "loss": 1.4109, + "num_input_tokens_seen": 90948716, + "step": 1373 + }, + { + "epoch": 0.12851593578883325, + "loss": 1.512143611907959, + "loss_ce": 0.002378041623160243, + "loss_iou": 0.62890625, + "loss_num": 0.050537109375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 90948716, + "step": 1373 + }, + { + "epoch": 0.12860953807272898, + "grad_norm": 23.115093231201172, + "learning_rate": 5e-05, + "loss": 1.1726, + "num_input_tokens_seen": 91015700, + "step": 1374 + }, + { + "epoch": 0.12860953807272898, + "loss": 1.2207486629486084, + "loss_ce": 0.004928313195705414, + "loss_iou": 0.53515625, + "loss_num": 0.02880859375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 91015700, + "step": 1374 + }, + { + "epoch": 0.1287031403566247, + "grad_norm": 20.75998878479004, + "learning_rate": 5e-05, + "loss": 1.4698, + "num_input_tokens_seen": 91081828, + "step": 1375 + }, + { + "epoch": 0.1287031403566247, + "loss": 1.5335545539855957, + "loss_ce": 0.007675648666918278, + "loss_iou": 0.65625, + "loss_num": 0.042724609375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 91081828, + "step": 1375 + }, + { + "epoch": 0.12879674264052043, + "grad_norm": 23.59589958190918, + "learning_rate": 5e-05, + "loss": 1.3764, + "num_input_tokens_seen": 91147524, + "step": 1376 + }, + { + "epoch": 0.12879674264052043, + "loss": 1.2893974781036377, + "loss_ce": 0.0052177440375089645, + "loss_iou": 0.5703125, + "loss_num": 0.028076171875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 91147524, + "step": 1376 + }, + { + "epoch": 0.12889034492441614, + "grad_norm": 30.19612693786621, + "learning_rate": 5e-05, + "loss": 1.5341, + "num_input_tokens_seen": 91212540, + "step": 1377 + }, + { + "epoch": 0.12889034492441614, + "loss": 1.6309689283370972, + "loss_ce": 0.004015746992081404, + "loss_iou": 0.671875, + "loss_num": 0.05712890625, + "loss_xval": 1.625, + "num_input_tokens_seen": 91212540, + "step": 1377 + }, + { + "epoch": 0.12898394720831188, + "grad_norm": 16.537076950073242, + "learning_rate": 5e-05, + "loss": 1.7526, + "num_input_tokens_seen": 91278960, + "step": 1378 + }, + { + "epoch": 0.12898394720831188, + "loss": 1.9375122785568237, + "loss_ce": 0.005871674977242947, + "loss_iou": 0.80078125, + "loss_num": 0.06591796875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 91278960, + "step": 1378 + }, + { + "epoch": 0.12907754949220762, + "grad_norm": 14.124958038330078, + "learning_rate": 5e-05, + "loss": 1.3359, + "num_input_tokens_seen": 91346708, + "step": 1379 + }, + { + "epoch": 0.12907754949220762, + "loss": 1.371921181678772, + "loss_ce": 0.004733636975288391, + "loss_iou": 0.57421875, + "loss_num": 0.0439453125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 91346708, + "step": 1379 + }, + { + "epoch": 0.12917115177610333, + "grad_norm": 21.393112182617188, + "learning_rate": 5e-05, + "loss": 1.5838, + "num_input_tokens_seen": 91412816, + "step": 1380 + }, + { + "epoch": 0.12917115177610333, + "loss": 1.4501063823699951, + "loss_ce": 0.0008876234060153365, + "loss_iou": 0.5859375, + "loss_num": 0.0556640625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 91412816, + "step": 1380 + }, + { + "epoch": 0.12926475405999907, + "grad_norm": 10.855667114257812, + "learning_rate": 5e-05, + "loss": 1.3396, + "num_input_tokens_seen": 91479152, + "step": 1381 + }, + { + "epoch": 0.12926475405999907, + "loss": 1.5468095541000366, + "loss_ce": 0.0058245365507900715, + "loss_iou": 0.671875, + "loss_num": 0.039794921875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 91479152, + "step": 1381 + }, + { + "epoch": 0.12935835634389478, + "grad_norm": 10.485991477966309, + "learning_rate": 5e-05, + "loss": 1.1981, + "num_input_tokens_seen": 91544272, + "step": 1382 + }, + { + "epoch": 0.12935835634389478, + "loss": 1.0928484201431274, + "loss_ce": 0.004713610280305147, + "loss_iou": 0.4296875, + "loss_num": 0.045654296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 91544272, + "step": 1382 + }, + { + "epoch": 0.12945195862779052, + "grad_norm": 30.87342643737793, + "learning_rate": 5e-05, + "loss": 1.5138, + "num_input_tokens_seen": 91611320, + "step": 1383 + }, + { + "epoch": 0.12945195862779052, + "loss": 1.6626055240631104, + "loss_ce": 0.003425776492804289, + "loss_iou": 0.734375, + "loss_num": 0.03857421875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 91611320, + "step": 1383 + }, + { + "epoch": 0.12954556091168626, + "grad_norm": 40.892181396484375, + "learning_rate": 5e-05, + "loss": 1.7905, + "num_input_tokens_seen": 91677232, + "step": 1384 + }, + { + "epoch": 0.12954556091168626, + "loss": 1.8152230978012085, + "loss_ce": 0.001746569061651826, + "loss_iou": 0.80859375, + "loss_num": 0.038330078125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 91677232, + "step": 1384 + }, + { + "epoch": 0.12963916319558197, + "grad_norm": 17.921037673950195, + "learning_rate": 5e-05, + "loss": 1.592, + "num_input_tokens_seen": 91743024, + "step": 1385 + }, + { + "epoch": 0.12963916319558197, + "loss": 1.8663153648376465, + "loss_ce": 0.0030340198427438736, + "loss_iou": 0.796875, + "loss_num": 0.053466796875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 91743024, + "step": 1385 + }, + { + "epoch": 0.1297327654794777, + "grad_norm": 23.16750717163086, + "learning_rate": 5e-05, + "loss": 1.4521, + "num_input_tokens_seen": 91810484, + "step": 1386 + }, + { + "epoch": 0.1297327654794777, + "loss": 1.458481788635254, + "loss_ce": 0.0043802387081086636, + "loss_iou": 0.65625, + "loss_num": 0.0289306640625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 91810484, + "step": 1386 + }, + { + "epoch": 0.12982636776337342, + "grad_norm": 111.02323150634766, + "learning_rate": 5e-05, + "loss": 1.4423, + "num_input_tokens_seen": 91876592, + "step": 1387 + }, + { + "epoch": 0.12982636776337342, + "loss": 1.5060733556747437, + "loss_ce": 0.007049937732517719, + "loss_iou": 0.609375, + "loss_num": 0.055908203125, + "loss_xval": 1.5, + "num_input_tokens_seen": 91876592, + "step": 1387 + }, + { + "epoch": 0.12991997004726916, + "grad_norm": 19.789958953857422, + "learning_rate": 5e-05, + "loss": 1.6455, + "num_input_tokens_seen": 91943344, + "step": 1388 + }, + { + "epoch": 0.12991997004726916, + "loss": 1.7664657831192017, + "loss_ce": 0.0042588477954268456, + "loss_iou": 0.76171875, + "loss_num": 0.046630859375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 91943344, + "step": 1388 + }, + { + "epoch": 0.13001357233116487, + "grad_norm": 12.905505180358887, + "learning_rate": 5e-05, + "loss": 1.4612, + "num_input_tokens_seen": 92008716, + "step": 1389 + }, + { + "epoch": 0.13001357233116487, + "loss": 1.1903969049453735, + "loss_ce": 0.0033241792116314173, + "loss_iou": 0.48046875, + "loss_num": 0.044921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 92008716, + "step": 1389 + }, + { + "epoch": 0.1301071746150606, + "grad_norm": 103.31057739257812, + "learning_rate": 5e-05, + "loss": 1.2876, + "num_input_tokens_seen": 92074136, + "step": 1390 + }, + { + "epoch": 0.1301071746150606, + "loss": 1.2002911567687988, + "loss_ce": 0.007023334503173828, + "loss_iou": 0.50390625, + "loss_num": 0.03662109375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 92074136, + "step": 1390 + }, + { + "epoch": 0.13020077689895634, + "grad_norm": 19.765432357788086, + "learning_rate": 5e-05, + "loss": 1.4414, + "num_input_tokens_seen": 92141644, + "step": 1391 + }, + { + "epoch": 0.13020077689895634, + "loss": 1.5639369487762451, + "loss_ce": 0.0019252786878496408, + "loss_iou": 0.68359375, + "loss_num": 0.039306640625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 92141644, + "step": 1391 + }, + { + "epoch": 0.13029437918285205, + "grad_norm": 27.777828216552734, + "learning_rate": 5e-05, + "loss": 1.5448, + "num_input_tokens_seen": 92207420, + "step": 1392 + }, + { + "epoch": 0.13029437918285205, + "loss": 1.5984091758728027, + "loss_ce": 0.0041709281504154205, + "loss_iou": 0.6484375, + "loss_num": 0.06005859375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 92207420, + "step": 1392 + }, + { + "epoch": 0.1303879814667478, + "grad_norm": 25.60963249206543, + "learning_rate": 5e-05, + "loss": 1.8416, + "num_input_tokens_seen": 92271428, + "step": 1393 + }, + { + "epoch": 0.1303879814667478, + "loss": 2.0645360946655273, + "loss_ce": 0.007895649410784245, + "loss_iou": 0.8203125, + "loss_num": 0.0830078125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 92271428, + "step": 1393 + }, + { + "epoch": 0.1304815837506435, + "grad_norm": 21.06133460998535, + "learning_rate": 5e-05, + "loss": 1.4674, + "num_input_tokens_seen": 92338340, + "step": 1394 + }, + { + "epoch": 0.1304815837506435, + "loss": 1.5114936828613281, + "loss_ce": 0.005634224973618984, + "loss_iou": 0.65234375, + "loss_num": 0.040771484375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 92338340, + "step": 1394 + }, + { + "epoch": 0.13057518603453924, + "grad_norm": 11.62189769744873, + "learning_rate": 5e-05, + "loss": 1.3301, + "num_input_tokens_seen": 92405116, + "step": 1395 + }, + { + "epoch": 0.13057518603453924, + "loss": 1.341535210609436, + "loss_ce": 0.005109419114887714, + "loss_iou": 0.58203125, + "loss_num": 0.034423828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 92405116, + "step": 1395 + }, + { + "epoch": 0.13066878831843498, + "grad_norm": 14.54174518585205, + "learning_rate": 5e-05, + "loss": 1.3758, + "num_input_tokens_seen": 92472160, + "step": 1396 + }, + { + "epoch": 0.13066878831843498, + "loss": 1.4310237169265747, + "loss_ce": 0.001824527862481773, + "loss_iou": 0.62890625, + "loss_num": 0.034423828125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 92472160, + "step": 1396 + }, + { + "epoch": 0.1307623906023307, + "grad_norm": 22.171239852905273, + "learning_rate": 5e-05, + "loss": 1.4602, + "num_input_tokens_seen": 92538712, + "step": 1397 + }, + { + "epoch": 0.1307623906023307, + "loss": 1.2587718963623047, + "loss_ce": 0.003522851038724184, + "loss_iou": 0.53515625, + "loss_num": 0.036865234375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 92538712, + "step": 1397 + }, + { + "epoch": 0.13085599288622643, + "grad_norm": 40.621341705322266, + "learning_rate": 5e-05, + "loss": 1.6449, + "num_input_tokens_seen": 92605588, + "step": 1398 + }, + { + "epoch": 0.13085599288622643, + "loss": 1.5951776504516602, + "loss_ce": 0.002892507240176201, + "loss_iou": 0.68359375, + "loss_num": 0.045654296875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 92605588, + "step": 1398 + }, + { + "epoch": 0.13094959517012214, + "grad_norm": 19.85601043701172, + "learning_rate": 5e-05, + "loss": 1.9056, + "num_input_tokens_seen": 92672276, + "step": 1399 + }, + { + "epoch": 0.13094959517012214, + "loss": 1.9563559293746948, + "loss_ce": 0.0022544129751622677, + "loss_iou": 0.83984375, + "loss_num": 0.055419921875, + "loss_xval": 1.953125, + "num_input_tokens_seen": 92672276, + "step": 1399 + }, + { + "epoch": 0.13104319745401788, + "grad_norm": 16.9284725189209, + "learning_rate": 5e-05, + "loss": 1.4498, + "num_input_tokens_seen": 92739320, + "step": 1400 + }, + { + "epoch": 0.13104319745401788, + "loss": 1.446366548538208, + "loss_ce": 0.00886652059853077, + "loss_iou": 0.6171875, + "loss_num": 0.04052734375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 92739320, + "step": 1400 + }, + { + "epoch": 0.13113679973791362, + "grad_norm": 32.16905975341797, + "learning_rate": 5e-05, + "loss": 1.4314, + "num_input_tokens_seen": 92804600, + "step": 1401 + }, + { + "epoch": 0.13113679973791362, + "loss": 1.3611209392547607, + "loss_ce": 0.004675663076341152, + "loss_iou": 0.5859375, + "loss_num": 0.037841796875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 92804600, + "step": 1401 + }, + { + "epoch": 0.13123040202180933, + "grad_norm": 21.563016891479492, + "learning_rate": 5e-05, + "loss": 1.5172, + "num_input_tokens_seen": 92870192, + "step": 1402 + }, + { + "epoch": 0.13123040202180933, + "loss": 1.6784169673919678, + "loss_ce": 0.0065420567989349365, + "loss_iou": 0.7109375, + "loss_num": 0.0498046875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 92870192, + "step": 1402 + }, + { + "epoch": 0.13132400430570507, + "grad_norm": 28.80994987487793, + "learning_rate": 5e-05, + "loss": 1.6035, + "num_input_tokens_seen": 92935924, + "step": 1403 + }, + { + "epoch": 0.13132400430570507, + "loss": 1.8455475568771362, + "loss_ce": 0.007657048758119345, + "loss_iou": 0.7734375, + "loss_num": 0.05810546875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 92935924, + "step": 1403 + }, + { + "epoch": 0.13141760658960078, + "grad_norm": 16.39699935913086, + "learning_rate": 5e-05, + "loss": 1.7602, + "num_input_tokens_seen": 93002044, + "step": 1404 + }, + { + "epoch": 0.13141760658960078, + "loss": 1.8066574335098267, + "loss_ce": 0.003923080395907164, + "loss_iou": 0.796875, + "loss_num": 0.0419921875, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 93002044, + "step": 1404 + }, + { + "epoch": 0.13151120887349652, + "grad_norm": 18.839136123657227, + "learning_rate": 5e-05, + "loss": 1.4374, + "num_input_tokens_seen": 93068748, + "step": 1405 + }, + { + "epoch": 0.13151120887349652, + "loss": 1.1774072647094727, + "loss_ce": 0.00455570500344038, + "loss_iou": 0.48828125, + "loss_num": 0.039306640625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 93068748, + "step": 1405 + }, + { + "epoch": 0.13160481115739225, + "grad_norm": 36.0368537902832, + "learning_rate": 5e-05, + "loss": 1.4246, + "num_input_tokens_seen": 93135136, + "step": 1406 + }, + { + "epoch": 0.13160481115739225, + "loss": 1.6487808227539062, + "loss_ce": 0.0062027000822126865, + "loss_iou": 0.74609375, + "loss_num": 0.0301513671875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 93135136, + "step": 1406 + }, + { + "epoch": 0.13169841344128796, + "grad_norm": 17.659997940063477, + "learning_rate": 5e-05, + "loss": 1.7284, + "num_input_tokens_seen": 93201716, + "step": 1407 + }, + { + "epoch": 0.13169841344128796, + "loss": 1.6802024841308594, + "loss_ce": 0.0024681566283106804, + "loss_iou": 0.73046875, + "loss_num": 0.04345703125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 93201716, + "step": 1407 + }, + { + "epoch": 0.1317920157251837, + "grad_norm": 13.591567993164062, + "learning_rate": 5e-05, + "loss": 1.5516, + "num_input_tokens_seen": 93268212, + "step": 1408 + }, + { + "epoch": 0.1317920157251837, + "loss": 1.7014623880386353, + "loss_ce": 0.004196710418909788, + "loss_iou": 0.6875, + "loss_num": 0.06396484375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 93268212, + "step": 1408 + }, + { + "epoch": 0.13188561800907941, + "grad_norm": 15.220724105834961, + "learning_rate": 5e-05, + "loss": 1.2826, + "num_input_tokens_seen": 93334480, + "step": 1409 + }, + { + "epoch": 0.13188561800907941, + "loss": 1.4379498958587646, + "loss_ce": 0.0024029668420553207, + "loss_iou": 0.6015625, + "loss_num": 0.04638671875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 93334480, + "step": 1409 + }, + { + "epoch": 0.13197922029297515, + "grad_norm": 24.42323875427246, + "learning_rate": 5e-05, + "loss": 1.5851, + "num_input_tokens_seen": 93400640, + "step": 1410 + }, + { + "epoch": 0.13197922029297515, + "loss": 1.6681182384490967, + "loss_ce": 0.006985319312661886, + "loss_iou": 0.69140625, + "loss_num": 0.05615234375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 93400640, + "step": 1410 + }, + { + "epoch": 0.13207282257687086, + "grad_norm": 35.02754211425781, + "learning_rate": 5e-05, + "loss": 1.4912, + "num_input_tokens_seen": 93466624, + "step": 1411 + }, + { + "epoch": 0.13207282257687086, + "loss": 1.493984341621399, + "loss_ce": 0.004726508166640997, + "loss_iou": 0.58984375, + "loss_num": 0.06201171875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 93466624, + "step": 1411 + }, + { + "epoch": 0.1321664248607666, + "grad_norm": 21.393375396728516, + "learning_rate": 5e-05, + "loss": 1.7033, + "num_input_tokens_seen": 93533320, + "step": 1412 + }, + { + "epoch": 0.1321664248607666, + "loss": 1.729456901550293, + "loss_ce": 0.0038708860520273447, + "loss_iou": 0.72265625, + "loss_num": 0.0556640625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 93533320, + "step": 1412 + }, + { + "epoch": 0.13226002714466234, + "grad_norm": 45.04379653930664, + "learning_rate": 5e-05, + "loss": 1.4752, + "num_input_tokens_seen": 93599720, + "step": 1413 + }, + { + "epoch": 0.13226002714466234, + "loss": 1.413474678993225, + "loss_ce": 0.0018535281997174025, + "loss_iou": 0.609375, + "loss_num": 0.03857421875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 93599720, + "step": 1413 + }, + { + "epoch": 0.13235362942855805, + "grad_norm": 21.20207405090332, + "learning_rate": 5e-05, + "loss": 1.474, + "num_input_tokens_seen": 93665476, + "step": 1414 + }, + { + "epoch": 0.13235362942855805, + "loss": 1.4328948259353638, + "loss_ce": 0.004183912184089422, + "loss_iou": 0.56640625, + "loss_num": 0.0595703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 93665476, + "step": 1414 + }, + { + "epoch": 0.1324472317124538, + "grad_norm": 28.82359504699707, + "learning_rate": 5e-05, + "loss": 1.4343, + "num_input_tokens_seen": 93730960, + "step": 1415 + }, + { + "epoch": 0.1324472317124538, + "loss": 1.4721465110778809, + "loss_ce": 0.0024200051557272673, + "loss_iou": 0.6484375, + "loss_num": 0.033935546875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 93730960, + "step": 1415 + }, + { + "epoch": 0.1325408339963495, + "grad_norm": 12.276638984680176, + "learning_rate": 5e-05, + "loss": 1.4503, + "num_input_tokens_seen": 93797336, + "step": 1416 + }, + { + "epoch": 0.1325408339963495, + "loss": 1.6388862133026123, + "loss_ce": 0.004120668862015009, + "loss_iou": 0.6484375, + "loss_num": 0.06640625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 93797336, + "step": 1416 + }, + { + "epoch": 0.13263443628024524, + "grad_norm": 18.934528350830078, + "learning_rate": 5e-05, + "loss": 1.4018, + "num_input_tokens_seen": 93863448, + "step": 1417 + }, + { + "epoch": 0.13263443628024524, + "loss": 1.3206698894500732, + "loss_ce": 0.0032871188595891, + "loss_iou": 0.54296875, + "loss_num": 0.0458984375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 93863448, + "step": 1417 + }, + { + "epoch": 0.13272803856414098, + "grad_norm": 32.291786193847656, + "learning_rate": 5e-05, + "loss": 1.3848, + "num_input_tokens_seen": 93928764, + "step": 1418 + }, + { + "epoch": 0.13272803856414098, + "loss": 1.3497077226638794, + "loss_ce": 0.010352229699492455, + "loss_iou": 0.5859375, + "loss_num": 0.03369140625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 93928764, + "step": 1418 + }, + { + "epoch": 0.1328216408480367, + "grad_norm": 18.41583251953125, + "learning_rate": 5e-05, + "loss": 1.7208, + "num_input_tokens_seen": 93994328, + "step": 1419 + }, + { + "epoch": 0.1328216408480367, + "loss": 1.655555248260498, + "loss_ce": 0.004188154824078083, + "loss_iou": 0.71484375, + "loss_num": 0.044921875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 93994328, + "step": 1419 + }, + { + "epoch": 0.13291524313193243, + "grad_norm": 14.359472274780273, + "learning_rate": 5e-05, + "loss": 1.5621, + "num_input_tokens_seen": 94059920, + "step": 1420 + }, + { + "epoch": 0.13291524313193243, + "loss": 1.7808771133422852, + "loss_ce": 0.002556830644607544, + "loss_iou": 0.71875, + "loss_num": 0.06787109375, + "loss_xval": 1.78125, + "num_input_tokens_seen": 94059920, + "step": 1420 + }, + { + "epoch": 0.13300884541582814, + "grad_norm": 23.846860885620117, + "learning_rate": 5e-05, + "loss": 1.3449, + "num_input_tokens_seen": 94125840, + "step": 1421 + }, + { + "epoch": 0.13300884541582814, + "loss": 1.4242160320281982, + "loss_ce": 0.005758992396295071, + "loss_iou": 0.6015625, + "loss_num": 0.043212890625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 94125840, + "step": 1421 + }, + { + "epoch": 0.13310244769972387, + "grad_norm": 38.345027923583984, + "learning_rate": 5e-05, + "loss": 1.6819, + "num_input_tokens_seen": 94191860, + "step": 1422 + }, + { + "epoch": 0.13310244769972387, + "loss": 1.6982989311218262, + "loss_ce": 0.00396299222484231, + "loss_iou": 0.71484375, + "loss_num": 0.052490234375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 94191860, + "step": 1422 + }, + { + "epoch": 0.1331960499836196, + "grad_norm": 30.62224578857422, + "learning_rate": 5e-05, + "loss": 1.4571, + "num_input_tokens_seen": 94258260, + "step": 1423 + }, + { + "epoch": 0.1331960499836196, + "loss": 1.4068955183029175, + "loss_ce": 0.004551771562546492, + "loss_iou": 0.59375, + "loss_num": 0.04248046875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 94258260, + "step": 1423 + }, + { + "epoch": 0.13328965226751532, + "grad_norm": 15.79391098022461, + "learning_rate": 5e-05, + "loss": 1.7965, + "num_input_tokens_seen": 94325912, + "step": 1424 + }, + { + "epoch": 0.13328965226751532, + "loss": 1.9111042022705078, + "loss_ce": 0.002901125233620405, + "loss_iou": 0.8203125, + "loss_num": 0.052734375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 94325912, + "step": 1424 + }, + { + "epoch": 0.13338325455141106, + "grad_norm": 16.25873565673828, + "learning_rate": 5e-05, + "loss": 1.5263, + "num_input_tokens_seen": 94391860, + "step": 1425 + }, + { + "epoch": 0.13338325455141106, + "loss": 1.6111969947814941, + "loss_ce": 0.0042633311823010445, + "loss_iou": 0.68359375, + "loss_num": 0.0478515625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 94391860, + "step": 1425 + }, + { + "epoch": 0.13347685683530677, + "grad_norm": 18.118350982666016, + "learning_rate": 5e-05, + "loss": 1.3529, + "num_input_tokens_seen": 94457948, + "step": 1426 + }, + { + "epoch": 0.13347685683530677, + "loss": 1.354447364807129, + "loss_ce": 0.0057229273952543736, + "loss_iou": 0.5625, + "loss_num": 0.0439453125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 94457948, + "step": 1426 + }, + { + "epoch": 0.1335704591192025, + "grad_norm": 19.742839813232422, + "learning_rate": 5e-05, + "loss": 1.4873, + "num_input_tokens_seen": 94525160, + "step": 1427 + }, + { + "epoch": 0.1335704591192025, + "loss": 1.3646737337112427, + "loss_ce": 0.004810418467968702, + "loss_iou": 0.54296875, + "loss_num": 0.05517578125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 94525160, + "step": 1427 + }, + { + "epoch": 0.13366406140309822, + "grad_norm": 32.74245834350586, + "learning_rate": 5e-05, + "loss": 1.6196, + "num_input_tokens_seen": 94591060, + "step": 1428 + }, + { + "epoch": 0.13366406140309822, + "loss": 1.6130080223083496, + "loss_ce": 0.005586134735494852, + "loss_iou": 0.72265625, + "loss_num": 0.0322265625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 94591060, + "step": 1428 + }, + { + "epoch": 0.13375766368699396, + "grad_norm": 17.923078536987305, + "learning_rate": 5e-05, + "loss": 1.7979, + "num_input_tokens_seen": 94657624, + "step": 1429 + }, + { + "epoch": 0.13375766368699396, + "loss": 1.6485836505889893, + "loss_ce": 0.005029020830988884, + "loss_iou": 0.7265625, + "loss_num": 0.038818359375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 94657624, + "step": 1429 + }, + { + "epoch": 0.1338512659708897, + "grad_norm": 28.271013259887695, + "learning_rate": 5e-05, + "loss": 1.333, + "num_input_tokens_seen": 94722616, + "step": 1430 + }, + { + "epoch": 0.1338512659708897, + "loss": 1.2937275171279907, + "loss_ce": 0.010036170482635498, + "loss_iou": 0.5078125, + "loss_num": 0.0537109375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 94722616, + "step": 1430 + }, + { + "epoch": 0.1339448682547854, + "grad_norm": 18.96759796142578, + "learning_rate": 5e-05, + "loss": 1.1467, + "num_input_tokens_seen": 94788368, + "step": 1431 + }, + { + "epoch": 0.1339448682547854, + "loss": 1.2531790733337402, + "loss_ce": 0.004399813711643219, + "loss_iou": 0.5078125, + "loss_num": 0.046142578125, + "loss_xval": 1.25, + "num_input_tokens_seen": 94788368, + "step": 1431 + }, + { + "epoch": 0.13403847053868115, + "grad_norm": 30.16978645324707, + "learning_rate": 5e-05, + "loss": 1.4575, + "num_input_tokens_seen": 94854476, + "step": 1432 + }, + { + "epoch": 0.13403847053868115, + "loss": 1.4885075092315674, + "loss_ce": 0.0036137597635388374, + "loss_iou": 0.609375, + "loss_num": 0.053466796875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 94854476, + "step": 1432 + }, + { + "epoch": 0.13413207282257686, + "grad_norm": 21.352434158325195, + "learning_rate": 5e-05, + "loss": 1.7639, + "num_input_tokens_seen": 94922156, + "step": 1433 + }, + { + "epoch": 0.13413207282257686, + "loss": 1.6939449310302734, + "loss_ce": 0.0025387154892086983, + "loss_iou": 0.734375, + "loss_num": 0.0439453125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 94922156, + "step": 1433 + }, + { + "epoch": 0.1342256751064726, + "grad_norm": 14.761849403381348, + "learning_rate": 5e-05, + "loss": 1.323, + "num_input_tokens_seen": 94988076, + "step": 1434 + }, + { + "epoch": 0.1342256751064726, + "loss": 1.2977325916290283, + "loss_ce": 0.00427562091499567, + "loss_iou": 0.53515625, + "loss_num": 0.044677734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 94988076, + "step": 1434 + }, + { + "epoch": 0.13431927739036834, + "grad_norm": 28.465805053710938, + "learning_rate": 5e-05, + "loss": 1.5796, + "num_input_tokens_seen": 95054844, + "step": 1435 + }, + { + "epoch": 0.13431927739036834, + "loss": 1.4701448678970337, + "loss_ce": 0.005301137454807758, + "loss_iou": 0.65234375, + "loss_num": 0.0322265625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 95054844, + "step": 1435 + }, + { + "epoch": 0.13441287967426405, + "grad_norm": 38.11674118041992, + "learning_rate": 5e-05, + "loss": 1.6241, + "num_input_tokens_seen": 95120052, + "step": 1436 + }, + { + "epoch": 0.13441287967426405, + "loss": 1.4885752201080322, + "loss_ce": 0.0031321346759796143, + "loss_iou": 0.640625, + "loss_num": 0.0400390625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 95120052, + "step": 1436 + }, + { + "epoch": 0.13450648195815978, + "grad_norm": 21.564964294433594, + "learning_rate": 5e-05, + "loss": 1.6243, + "num_input_tokens_seen": 95186444, + "step": 1437 + }, + { + "epoch": 0.13450648195815978, + "loss": 1.5397932529449463, + "loss_ce": 0.0007306834449991584, + "loss_iou": 0.69140625, + "loss_num": 0.03125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 95186444, + "step": 1437 + }, + { + "epoch": 0.1346000842420555, + "grad_norm": 11.223896980285645, + "learning_rate": 5e-05, + "loss": 1.3417, + "num_input_tokens_seen": 95251976, + "step": 1438 + }, + { + "epoch": 0.1346000842420555, + "loss": 1.4510210752487183, + "loss_ce": 0.002778932685032487, + "loss_iou": 0.625, + "loss_num": 0.0400390625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 95251976, + "step": 1438 + }, + { + "epoch": 0.13469368652595123, + "grad_norm": 14.81171703338623, + "learning_rate": 5e-05, + "loss": 1.2577, + "num_input_tokens_seen": 95316676, + "step": 1439 + }, + { + "epoch": 0.13469368652595123, + "loss": 1.3588645458221436, + "loss_ce": 0.003395825158804655, + "loss_iou": 0.546875, + "loss_num": 0.052734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 95316676, + "step": 1439 + }, + { + "epoch": 0.13478728880984697, + "grad_norm": 14.567553520202637, + "learning_rate": 5e-05, + "loss": 1.3817, + "num_input_tokens_seen": 95383056, + "step": 1440 + }, + { + "epoch": 0.13478728880984697, + "loss": 1.2535104751586914, + "loss_ce": 0.0025339778512716293, + "loss_iou": 0.55859375, + "loss_num": 0.0269775390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 95383056, + "step": 1440 + }, + { + "epoch": 0.13488089109374268, + "grad_norm": 38.295921325683594, + "learning_rate": 5e-05, + "loss": 1.2326, + "num_input_tokens_seen": 95450028, + "step": 1441 + }, + { + "epoch": 0.13488089109374268, + "loss": 1.1347554922103882, + "loss_ce": 0.00682583823800087, + "loss_iou": 0.515625, + "loss_num": 0.019287109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 95450028, + "step": 1441 + }, + { + "epoch": 0.13497449337763842, + "grad_norm": 30.807174682617188, + "learning_rate": 5e-05, + "loss": 1.24, + "num_input_tokens_seen": 95515672, + "step": 1442 + }, + { + "epoch": 0.13497449337763842, + "loss": 1.3047294616699219, + "loss_ce": 0.004924705717712641, + "loss_iou": 0.55859375, + "loss_num": 0.0361328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 95515672, + "step": 1442 + }, + { + "epoch": 0.13506809566153413, + "grad_norm": 18.528146743774414, + "learning_rate": 5e-05, + "loss": 1.7542, + "num_input_tokens_seen": 95581716, + "step": 1443 + }, + { + "epoch": 0.13506809566153413, + "loss": 1.957296371459961, + "loss_ce": 0.003194748191162944, + "loss_iou": 0.80859375, + "loss_num": 0.06787109375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 95581716, + "step": 1443 + }, + { + "epoch": 0.13516169794542987, + "grad_norm": 21.910905838012695, + "learning_rate": 5e-05, + "loss": 1.473, + "num_input_tokens_seen": 95648348, + "step": 1444 + }, + { + "epoch": 0.13516169794542987, + "loss": 1.4957733154296875, + "loss_ce": 0.001632708590477705, + "loss_iou": 0.62890625, + "loss_num": 0.047607421875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 95648348, + "step": 1444 + }, + { + "epoch": 0.1352553002293256, + "grad_norm": 35.485477447509766, + "learning_rate": 5e-05, + "loss": 1.5512, + "num_input_tokens_seen": 95714692, + "step": 1445 + }, + { + "epoch": 0.1352553002293256, + "loss": 1.4851305484771729, + "loss_ce": 0.005882611498236656, + "loss_iou": 0.66015625, + "loss_num": 0.0322265625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 95714692, + "step": 1445 + }, + { + "epoch": 0.13534890251322132, + "grad_norm": 13.884212493896484, + "learning_rate": 5e-05, + "loss": 1.5487, + "num_input_tokens_seen": 95781276, + "step": 1446 + }, + { + "epoch": 0.13534890251322132, + "loss": 1.3953523635864258, + "loss_ce": 0.0033235454466193914, + "loss_iou": 0.59375, + "loss_num": 0.04150390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 95781276, + "step": 1446 + }, + { + "epoch": 0.13544250479711706, + "grad_norm": 12.372857093811035, + "learning_rate": 5e-05, + "loss": 1.1581, + "num_input_tokens_seen": 95845924, + "step": 1447 + }, + { + "epoch": 0.13544250479711706, + "loss": 1.3289029598236084, + "loss_ce": 0.0027310599107295275, + "loss_iou": 0.5390625, + "loss_num": 0.05029296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 95845924, + "step": 1447 + }, + { + "epoch": 0.13553610708101277, + "grad_norm": 25.897235870361328, + "learning_rate": 5e-05, + "loss": 1.1798, + "num_input_tokens_seen": 95912504, + "step": 1448 + }, + { + "epoch": 0.13553610708101277, + "loss": 1.1852606534957886, + "loss_ce": 0.005084910895675421, + "loss_iou": 0.53515625, + "loss_num": 0.021728515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 95912504, + "step": 1448 + }, + { + "epoch": 0.1356297093649085, + "grad_norm": 21.500213623046875, + "learning_rate": 5e-05, + "loss": 1.2767, + "num_input_tokens_seen": 95978508, + "step": 1449 + }, + { + "epoch": 0.1356297093649085, + "loss": 1.2965521812438965, + "loss_ce": 0.00407177209854126, + "loss_iou": 0.5625, + "loss_num": 0.033935546875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 95978508, + "step": 1449 + }, + { + "epoch": 0.13572331164880422, + "grad_norm": 31.819379806518555, + "learning_rate": 5e-05, + "loss": 1.4956, + "num_input_tokens_seen": 96044504, + "step": 1450 + }, + { + "epoch": 0.13572331164880422, + "loss": 1.5511112213134766, + "loss_ce": 0.00228312611579895, + "loss_iou": 0.71875, + "loss_num": 0.022216796875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 96044504, + "step": 1450 + }, + { + "epoch": 0.13581691393269996, + "grad_norm": 15.583928108215332, + "learning_rate": 5e-05, + "loss": 1.6707, + "num_input_tokens_seen": 96109908, + "step": 1451 + }, + { + "epoch": 0.13581691393269996, + "loss": 1.4697784185409546, + "loss_ce": 0.004446416161954403, + "loss_iou": 0.65625, + "loss_num": 0.0303955078125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 96109908, + "step": 1451 + }, + { + "epoch": 0.1359105162165957, + "grad_norm": 22.426509857177734, + "learning_rate": 5e-05, + "loss": 1.5282, + "num_input_tokens_seen": 96176388, + "step": 1452 + }, + { + "epoch": 0.1359105162165957, + "loss": 1.519659161567688, + "loss_ce": 0.0035458810161799192, + "loss_iou": 0.671875, + "loss_num": 0.0341796875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 96176388, + "step": 1452 + }, + { + "epoch": 0.1360041185004914, + "grad_norm": 17.847627639770508, + "learning_rate": 5e-05, + "loss": 1.6092, + "num_input_tokens_seen": 96241400, + "step": 1453 + }, + { + "epoch": 0.1360041185004914, + "loss": 1.5556532144546509, + "loss_ce": 0.005848539061844349, + "loss_iou": 0.66015625, + "loss_num": 0.0458984375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 96241400, + "step": 1453 + }, + { + "epoch": 0.13609772078438714, + "grad_norm": 22.900426864624023, + "learning_rate": 5e-05, + "loss": 1.4858, + "num_input_tokens_seen": 96307140, + "step": 1454 + }, + { + "epoch": 0.13609772078438714, + "loss": 1.3733822107315063, + "loss_ce": 0.006927090231329203, + "loss_iou": 0.55078125, + "loss_num": 0.052734375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 96307140, + "step": 1454 + }, + { + "epoch": 0.13619132306828285, + "grad_norm": 136.20973205566406, + "learning_rate": 5e-05, + "loss": 1.454, + "num_input_tokens_seen": 96373008, + "step": 1455 + }, + { + "epoch": 0.13619132306828285, + "loss": 1.5045087337493896, + "loss_ce": 0.004997122101485729, + "loss_iou": 0.640625, + "loss_num": 0.04296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 96373008, + "step": 1455 + }, + { + "epoch": 0.1362849253521786, + "grad_norm": 20.077259063720703, + "learning_rate": 5e-05, + "loss": 1.7174, + "num_input_tokens_seen": 96440248, + "step": 1456 + }, + { + "epoch": 0.1362849253521786, + "loss": 1.774254322052002, + "loss_ce": 0.005699529312551022, + "loss_iou": 0.765625, + "loss_num": 0.04833984375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 96440248, + "step": 1456 + }, + { + "epoch": 0.13637852763607433, + "grad_norm": 18.524415969848633, + "learning_rate": 5e-05, + "loss": 1.4061, + "num_input_tokens_seen": 96506472, + "step": 1457 + }, + { + "epoch": 0.13637852763607433, + "loss": 1.5060696601867676, + "loss_ce": 0.005093043204396963, + "loss_iou": 0.63671875, + "loss_num": 0.0458984375, + "loss_xval": 1.5, + "num_input_tokens_seen": 96506472, + "step": 1457 + }, + { + "epoch": 0.13647212991997004, + "grad_norm": 34.78091812133789, + "learning_rate": 5e-05, + "loss": 1.3896, + "num_input_tokens_seen": 96572796, + "step": 1458 + }, + { + "epoch": 0.13647212991997004, + "loss": 1.5728785991668701, + "loss_ce": 0.001589577179402113, + "loss_iou": 0.67578125, + "loss_num": 0.043212890625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 96572796, + "step": 1458 + }, + { + "epoch": 0.13656573220386578, + "grad_norm": 22.908023834228516, + "learning_rate": 5e-05, + "loss": 1.4915, + "num_input_tokens_seen": 96638684, + "step": 1459 + }, + { + "epoch": 0.13656573220386578, + "loss": 1.4252605438232422, + "loss_ce": 0.0014324421063065529, + "loss_iou": 0.61328125, + "loss_num": 0.039794921875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 96638684, + "step": 1459 + }, + { + "epoch": 0.1366593344877615, + "grad_norm": 22.792509078979492, + "learning_rate": 5e-05, + "loss": 1.7169, + "num_input_tokens_seen": 96704656, + "step": 1460 + }, + { + "epoch": 0.1366593344877615, + "loss": 1.835850477218628, + "loss_ce": 0.006748875603079796, + "loss_iou": 0.8125, + "loss_num": 0.040283203125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 96704656, + "step": 1460 + }, + { + "epoch": 0.13675293677165723, + "grad_norm": 29.556488037109375, + "learning_rate": 5e-05, + "loss": 1.1412, + "num_input_tokens_seen": 96769928, + "step": 1461 + }, + { + "epoch": 0.13675293677165723, + "loss": 0.8836391568183899, + "loss_ce": 0.004549796227365732, + "loss_iou": 0.361328125, + "loss_num": 0.03125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 96769928, + "step": 1461 + }, + { + "epoch": 0.13684653905555297, + "grad_norm": 58.098838806152344, + "learning_rate": 5e-05, + "loss": 1.5686, + "num_input_tokens_seen": 96836292, + "step": 1462 + }, + { + "epoch": 0.13684653905555297, + "loss": 1.7508189678192139, + "loss_ce": 0.0027720038779079914, + "loss_iou": 0.76953125, + "loss_num": 0.042236328125, + "loss_xval": 1.75, + "num_input_tokens_seen": 96836292, + "step": 1462 + }, + { + "epoch": 0.13694014133944868, + "grad_norm": 12.583563804626465, + "learning_rate": 5e-05, + "loss": 1.3308, + "num_input_tokens_seen": 96902740, + "step": 1463 + }, + { + "epoch": 0.13694014133944868, + "loss": 1.4209550619125366, + "loss_ce": 0.005427726544439793, + "loss_iou": 0.6171875, + "loss_num": 0.036376953125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 96902740, + "step": 1463 + }, + { + "epoch": 0.13703374362334442, + "grad_norm": 21.045217514038086, + "learning_rate": 5e-05, + "loss": 1.3016, + "num_input_tokens_seen": 96968620, + "step": 1464 + }, + { + "epoch": 0.13703374362334442, + "loss": 1.2764999866485596, + "loss_ce": 0.0025741523131728172, + "loss_iou": 0.494140625, + "loss_num": 0.057373046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 96968620, + "step": 1464 + }, + { + "epoch": 0.13712734590724013, + "grad_norm": 30.389692306518555, + "learning_rate": 5e-05, + "loss": 1.5985, + "num_input_tokens_seen": 97034388, + "step": 1465 + }, + { + "epoch": 0.13712734590724013, + "loss": 1.6568264961242676, + "loss_ce": 0.006924077868461609, + "loss_iou": 0.69921875, + "loss_num": 0.050048828125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 97034388, + "step": 1465 + }, + { + "epoch": 0.13722094819113587, + "grad_norm": 18.88156509399414, + "learning_rate": 5e-05, + "loss": 1.5185, + "num_input_tokens_seen": 97100156, + "step": 1466 + }, + { + "epoch": 0.13722094819113587, + "loss": 1.4082818031311035, + "loss_ce": 0.003252509981393814, + "loss_iou": 0.59375, + "loss_num": 0.043701171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 97100156, + "step": 1466 + }, + { + "epoch": 0.13731455047503158, + "grad_norm": 19.068809509277344, + "learning_rate": 5e-05, + "loss": 1.5123, + "num_input_tokens_seen": 97166056, + "step": 1467 + }, + { + "epoch": 0.13731455047503158, + "loss": 1.5042084455490112, + "loss_ce": 0.006161610130220652, + "loss_iou": 0.625, + "loss_num": 0.050048828125, + "loss_xval": 1.5, + "num_input_tokens_seen": 97166056, + "step": 1467 + }, + { + "epoch": 0.13740815275892732, + "grad_norm": 34.79692077636719, + "learning_rate": 5e-05, + "loss": 1.4256, + "num_input_tokens_seen": 97232508, + "step": 1468 + }, + { + "epoch": 0.13740815275892732, + "loss": 1.284287691116333, + "loss_ce": 0.0035258689895272255, + "loss_iou": 0.5546875, + "loss_num": 0.034423828125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 97232508, + "step": 1468 + }, + { + "epoch": 0.13750175504282305, + "grad_norm": 16.231090545654297, + "learning_rate": 5e-05, + "loss": 1.7221, + "num_input_tokens_seen": 97298300, + "step": 1469 + }, + { + "epoch": 0.13750175504282305, + "loss": 1.5546045303344727, + "loss_ce": 0.005288119427859783, + "loss_iou": 0.6640625, + "loss_num": 0.04345703125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 97298300, + "step": 1469 + }, + { + "epoch": 0.13759535732671876, + "grad_norm": 18.629154205322266, + "learning_rate": 5e-05, + "loss": 1.5172, + "num_input_tokens_seen": 97365736, + "step": 1470 + }, + { + "epoch": 0.13759535732671876, + "loss": 1.3760257959365845, + "loss_ce": 0.0010258422698825598, + "loss_iou": 0.61328125, + "loss_num": 0.030029296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 97365736, + "step": 1470 + }, + { + "epoch": 0.1376889596106145, + "grad_norm": 23.517915725708008, + "learning_rate": 5e-05, + "loss": 1.6245, + "num_input_tokens_seen": 97431052, + "step": 1471 + }, + { + "epoch": 0.1376889596106145, + "loss": 1.8706618547439575, + "loss_ce": 0.0034743547439575195, + "loss_iou": 0.7578125, + "loss_num": 0.07080078125, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 97431052, + "step": 1471 + }, + { + "epoch": 0.1377825618945102, + "grad_norm": 20.021461486816406, + "learning_rate": 5e-05, + "loss": 1.4881, + "num_input_tokens_seen": 97497724, + "step": 1472 + }, + { + "epoch": 0.1377825618945102, + "loss": 1.5418694019317627, + "loss_ce": 0.003783364314585924, + "loss_iou": 0.671875, + "loss_num": 0.03857421875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 97497724, + "step": 1472 + }, + { + "epoch": 0.13787616417840595, + "grad_norm": 21.375080108642578, + "learning_rate": 5e-05, + "loss": 1.3725, + "num_input_tokens_seen": 97564116, + "step": 1473 + }, + { + "epoch": 0.13787616417840595, + "loss": 1.447035551071167, + "loss_ce": 0.004652666859328747, + "loss_iou": 0.64453125, + "loss_num": 0.02978515625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 97564116, + "step": 1473 + }, + { + "epoch": 0.1379697664623017, + "grad_norm": 43.405216217041016, + "learning_rate": 5e-05, + "loss": 1.4266, + "num_input_tokens_seen": 97629620, + "step": 1474 + }, + { + "epoch": 0.1379697664623017, + "loss": 1.374740481376648, + "loss_ce": 0.0033110652584582567, + "loss_iou": 0.5703125, + "loss_num": 0.046630859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 97629620, + "step": 1474 + }, + { + "epoch": 0.1380633687461974, + "grad_norm": 45.995914459228516, + "learning_rate": 5e-05, + "loss": 1.4239, + "num_input_tokens_seen": 97696068, + "step": 1475 + }, + { + "epoch": 0.1380633687461974, + "loss": 1.4466938972473145, + "loss_ce": 0.0062641566619277, + "loss_iou": 0.640625, + "loss_num": 0.0311279296875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 97696068, + "step": 1475 + }, + { + "epoch": 0.13815697103009314, + "grad_norm": 23.018688201904297, + "learning_rate": 5e-05, + "loss": 1.6538, + "num_input_tokens_seen": 97761804, + "step": 1476 + }, + { + "epoch": 0.13815697103009314, + "loss": 1.5999515056610107, + "loss_ce": 0.0032719094306230545, + "loss_iou": 0.69140625, + "loss_num": 0.043212890625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 97761804, + "step": 1476 + }, + { + "epoch": 0.13825057331398885, + "grad_norm": 16.344158172607422, + "learning_rate": 5e-05, + "loss": 1.3571, + "num_input_tokens_seen": 97827784, + "step": 1477 + }, + { + "epoch": 0.13825057331398885, + "loss": 1.2144036293029785, + "loss_ce": 0.006884063594043255, + "loss_iou": 0.4921875, + "loss_num": 0.044677734375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 97827784, + "step": 1477 + }, + { + "epoch": 0.1383441755978846, + "grad_norm": 70.98696899414062, + "learning_rate": 5e-05, + "loss": 1.7217, + "num_input_tokens_seen": 97894908, + "step": 1478 + }, + { + "epoch": 0.1383441755978846, + "loss": 1.8056621551513672, + "loss_ce": 0.011716828681528568, + "loss_iou": 0.7890625, + "loss_num": 0.04345703125, + "loss_xval": 1.796875, + "num_input_tokens_seen": 97894908, + "step": 1478 + }, + { + "epoch": 0.13843777788178033, + "grad_norm": 31.899520874023438, + "learning_rate": 5e-05, + "loss": 1.8972, + "num_input_tokens_seen": 97961448, + "step": 1479 + }, + { + "epoch": 0.13843777788178033, + "loss": 1.9800881147384644, + "loss_ce": 0.0025489763356745243, + "loss_iou": 0.8125, + "loss_num": 0.0712890625, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 97961448, + "step": 1479 + }, + { + "epoch": 0.13853138016567604, + "grad_norm": 33.952205657958984, + "learning_rate": 5e-05, + "loss": 1.8364, + "num_input_tokens_seen": 98027512, + "step": 1480 + }, + { + "epoch": 0.13853138016567604, + "loss": 1.9767546653747559, + "loss_ce": 0.005075046792626381, + "loss_iou": 0.859375, + "loss_num": 0.05078125, + "loss_xval": 1.96875, + "num_input_tokens_seen": 98027512, + "step": 1480 + }, + { + "epoch": 0.13862498244957178, + "grad_norm": 10.682676315307617, + "learning_rate": 5e-05, + "loss": 1.2754, + "num_input_tokens_seen": 98094120, + "step": 1481 + }, + { + "epoch": 0.13862498244957178, + "loss": 1.244215965270996, + "loss_ce": 0.0020283497869968414, + "loss_iou": 0.55859375, + "loss_num": 0.0255126953125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 98094120, + "step": 1481 + }, + { + "epoch": 0.1387185847334675, + "grad_norm": 13.159777641296387, + "learning_rate": 5e-05, + "loss": 1.4164, + "num_input_tokens_seen": 98161292, + "step": 1482 + }, + { + "epoch": 0.1387185847334675, + "loss": 1.35675847530365, + "loss_ce": 0.005196006502956152, + "loss_iou": 0.578125, + "loss_num": 0.0390625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 98161292, + "step": 1482 + }, + { + "epoch": 0.13881218701736323, + "grad_norm": 31.39657974243164, + "learning_rate": 5e-05, + "loss": 1.3705, + "num_input_tokens_seen": 98227904, + "step": 1483 + }, + { + "epoch": 0.13881218701736323, + "loss": 1.2967456579208374, + "loss_ce": 0.004631428048014641, + "loss_iou": 0.56640625, + "loss_num": 0.03271484375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 98227904, + "step": 1483 + }, + { + "epoch": 0.13890578930125896, + "grad_norm": 18.579883575439453, + "learning_rate": 5e-05, + "loss": 1.628, + "num_input_tokens_seen": 98294892, + "step": 1484 + }, + { + "epoch": 0.13890578930125896, + "loss": 1.8487024307250977, + "loss_ce": 0.003975847736001015, + "loss_iou": 0.78125, + "loss_num": 0.056640625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 98294892, + "step": 1484 + }, + { + "epoch": 0.13899939158515467, + "grad_norm": 11.909125328063965, + "learning_rate": 5e-05, + "loss": 1.3687, + "num_input_tokens_seen": 98360852, + "step": 1485 + }, + { + "epoch": 0.13899939158515467, + "loss": 1.5161800384521484, + "loss_ce": 0.006902600638568401, + "loss_iou": 0.6171875, + "loss_num": 0.0556640625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 98360852, + "step": 1485 + }, + { + "epoch": 0.1390929938690504, + "grad_norm": 22.15267562866211, + "learning_rate": 5e-05, + "loss": 1.3996, + "num_input_tokens_seen": 98427528, + "step": 1486 + }, + { + "epoch": 0.1390929938690504, + "loss": 1.5718417167663574, + "loss_ce": 0.0015292345779016614, + "loss_iou": 0.703125, + "loss_num": 0.0322265625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 98427528, + "step": 1486 + }, + { + "epoch": 0.13918659615294612, + "grad_norm": 17.999814987182617, + "learning_rate": 5e-05, + "loss": 1.5932, + "num_input_tokens_seen": 98493120, + "step": 1487 + }, + { + "epoch": 0.13918659615294612, + "loss": 1.5637671947479248, + "loss_ce": 0.005905886180698872, + "loss_iou": 0.6953125, + "loss_num": 0.032958984375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 98493120, + "step": 1487 + }, + { + "epoch": 0.13928019843684186, + "grad_norm": 14.201549530029297, + "learning_rate": 5e-05, + "loss": 1.1767, + "num_input_tokens_seen": 98559444, + "step": 1488 + }, + { + "epoch": 0.13928019843684186, + "loss": 1.1103944778442383, + "loss_ce": 0.001019414747133851, + "loss_iou": 0.498046875, + "loss_num": 0.0223388671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 98559444, + "step": 1488 + }, + { + "epoch": 0.13937380072073757, + "grad_norm": 14.96893310546875, + "learning_rate": 5e-05, + "loss": 1.4167, + "num_input_tokens_seen": 98625640, + "step": 1489 + }, + { + "epoch": 0.13937380072073757, + "loss": 1.501664161682129, + "loss_ce": 0.009720880538225174, + "loss_iou": 0.57421875, + "loss_num": 0.068359375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 98625640, + "step": 1489 + }, + { + "epoch": 0.1394674030046333, + "grad_norm": 151.1747283935547, + "learning_rate": 5e-05, + "loss": 1.0989, + "num_input_tokens_seen": 98691280, + "step": 1490 + }, + { + "epoch": 0.1394674030046333, + "loss": 1.0764378309249878, + "loss_ce": 0.008139526471495628, + "loss_iou": 0.46484375, + "loss_num": 0.0281982421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 98691280, + "step": 1490 + }, + { + "epoch": 0.13956100528852905, + "grad_norm": 36.526615142822266, + "learning_rate": 5e-05, + "loss": 1.6628, + "num_input_tokens_seen": 98756456, + "step": 1491 + }, + { + "epoch": 0.13956100528852905, + "loss": 1.5029892921447754, + "loss_ce": 0.003965775948017836, + "loss_iou": 0.6328125, + "loss_num": 0.047119140625, + "loss_xval": 1.5, + "num_input_tokens_seen": 98756456, + "step": 1491 + }, + { + "epoch": 0.13965460757242476, + "grad_norm": 18.75931739807129, + "learning_rate": 5e-05, + "loss": 1.5891, + "num_input_tokens_seen": 98822744, + "step": 1492 + }, + { + "epoch": 0.13965460757242476, + "loss": 1.6656162738800049, + "loss_ce": 0.006436473689973354, + "loss_iou": 0.7265625, + "loss_num": 0.04150390625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 98822744, + "step": 1492 + }, + { + "epoch": 0.1397482098563205, + "grad_norm": 11.803533554077148, + "learning_rate": 5e-05, + "loss": 1.1741, + "num_input_tokens_seen": 98888000, + "step": 1493 + }, + { + "epoch": 0.1397482098563205, + "loss": 1.4049453735351562, + "loss_ce": 0.005531243979930878, + "loss_iou": 0.57421875, + "loss_num": 0.049560546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 98888000, + "step": 1493 + }, + { + "epoch": 0.1398418121402162, + "grad_norm": 24.705406188964844, + "learning_rate": 5e-05, + "loss": 1.5608, + "num_input_tokens_seen": 98953916, + "step": 1494 + }, + { + "epoch": 0.1398418121402162, + "loss": 1.478304147720337, + "loss_ce": 0.0036947601474821568, + "loss_iou": 0.59375, + "loss_num": 0.05712890625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 98953916, + "step": 1494 + }, + { + "epoch": 0.13993541442411195, + "grad_norm": 24.102781295776367, + "learning_rate": 5e-05, + "loss": 1.503, + "num_input_tokens_seen": 99019580, + "step": 1495 + }, + { + "epoch": 0.13993541442411195, + "loss": 1.5414433479309082, + "loss_ce": 0.006775477435439825, + "loss_iou": 0.67578125, + "loss_num": 0.035888671875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 99019580, + "step": 1495 + }, + { + "epoch": 0.1400290167080077, + "grad_norm": 43.382747650146484, + "learning_rate": 5e-05, + "loss": 1.8064, + "num_input_tokens_seen": 99084964, + "step": 1496 + }, + { + "epoch": 0.1400290167080077, + "loss": 2.066258192062378, + "loss_ce": 0.004734785296022892, + "loss_iou": 0.83984375, + "loss_num": 0.076171875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 99084964, + "step": 1496 + }, + { + "epoch": 0.1401226189919034, + "grad_norm": 26.74125099182129, + "learning_rate": 5e-05, + "loss": 1.5637, + "num_input_tokens_seen": 99150640, + "step": 1497 + }, + { + "epoch": 0.1401226189919034, + "loss": 1.5906933546066284, + "loss_ce": 0.012568360194563866, + "loss_iou": 0.62890625, + "loss_num": 0.0634765625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 99150640, + "step": 1497 + }, + { + "epoch": 0.14021622127579914, + "grad_norm": 22.88028907775879, + "learning_rate": 5e-05, + "loss": 1.8411, + "num_input_tokens_seen": 99217832, + "step": 1498 + }, + { + "epoch": 0.14021622127579914, + "loss": 1.9064733982086182, + "loss_ce": 0.004129617474973202, + "loss_iou": 0.7890625, + "loss_num": 0.06494140625, + "loss_xval": 1.90625, + "num_input_tokens_seen": 99217832, + "step": 1498 + }, + { + "epoch": 0.14030982355969485, + "grad_norm": 28.038320541381836, + "learning_rate": 5e-05, + "loss": 1.5641, + "num_input_tokens_seen": 99283692, + "step": 1499 + }, + { + "epoch": 0.14030982355969485, + "loss": 1.6595817804336548, + "loss_ce": 0.010655960068106651, + "loss_iou": 0.65234375, + "loss_num": 0.0693359375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 99283692, + "step": 1499 + }, + { + "epoch": 0.14040342584359058, + "grad_norm": 22.882043838500977, + "learning_rate": 5e-05, + "loss": 1.6246, + "num_input_tokens_seen": 99349752, + "step": 1500 + }, + { + "epoch": 0.14040342584359058, + "eval_seeclick_CIoU": 0.12986931204795837, + "eval_seeclick_GIoU": 0.1154865175485611, + "eval_seeclick_IoU": 0.2536672055721283, + "eval_seeclick_MAE_all": 0.14381030946969986, + "eval_seeclick_MAE_h": 0.10508225485682487, + "eval_seeclick_MAE_w": 0.11415602266788483, + "eval_seeclick_MAE_x_boxes": 0.20156145840883255, + "eval_seeclick_MAE_y_boxes": 0.12079284340143204, + "eval_seeclick_NUM_probability": 0.9990553855895996, + "eval_seeclick_inside_bbox": 0.4208333343267441, + "eval_seeclick_loss": 2.517971992492676, + "eval_seeclick_loss_ce": 0.014489146415144205, + "eval_seeclick_loss_iou": 0.8828125, + "eval_seeclick_loss_num": 0.14289093017578125, + "eval_seeclick_loss_xval": 2.48046875, + "eval_seeclick_runtime": 61.9424, + "eval_seeclick_samples_per_second": 0.759, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 99349752, + "step": 1500 + }, + { + "epoch": 0.14040342584359058, + "eval_icons_CIoU": -0.06381661631166935, + "eval_icons_GIoU": -0.012035993859171867, + "eval_icons_IoU": 0.12227378040552139, + "eval_icons_MAE_all": 0.1508197821676731, + "eval_icons_MAE_h": 0.12457311898469925, + "eval_icons_MAE_w": 0.15456288307905197, + "eval_icons_MAE_x_boxes": 0.16619648039340973, + "eval_icons_MAE_y_boxes": 0.06023375503718853, + "eval_icons_NUM_probability": 0.9997735619544983, + "eval_icons_inside_bbox": 0.2517361119389534, + "eval_icons_loss": 2.7961881160736084, + "eval_icons_loss_ce": 0.0007625973084941506, + "eval_icons_loss_iou": 0.99365234375, + "eval_icons_loss_num": 0.17418670654296875, + "eval_icons_loss_xval": 2.857421875, + "eval_icons_runtime": 73.5619, + "eval_icons_samples_per_second": 0.68, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 99349752, + "step": 1500 + }, + { + "epoch": 0.14040342584359058, + "eval_screenspot_CIoU": 0.005095538372794787, + "eval_screenspot_GIoU": 0.029694482684135437, + "eval_screenspot_IoU": 0.17252641916275024, + "eval_screenspot_MAE_all": 0.18482921024163565, + "eval_screenspot_MAE_h": 0.1193593442440033, + "eval_screenspot_MAE_w": 0.1454322561621666, + "eval_screenspot_MAE_x_boxes": 0.28102100888888043, + "eval_screenspot_MAE_y_boxes": 0.11931897948185603, + "eval_screenspot_NUM_probability": 0.9993451635042826, + "eval_screenspot_inside_bbox": 0.43833333253860474, + "eval_screenspot_loss": 2.9187819957733154, + "eval_screenspot_loss_ce": 0.026681889469424885, + "eval_screenspot_loss_iou": 0.9869791666666666, + "eval_screenspot_loss_num": 0.18549601236979166, + "eval_screenspot_loss_xval": 2.9010416666666665, + "eval_screenspot_runtime": 113.6674, + "eval_screenspot_samples_per_second": 0.783, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 99349752, + "step": 1500 + }, + { + "epoch": 0.14040342584359058, + "eval_compot_CIoU": -0.07622082345187664, + "eval_compot_GIoU": -0.04671732848510146, + "eval_compot_IoU": 0.10855663195252419, + "eval_compot_MAE_all": 0.1775462105870247, + "eval_compot_MAE_h": 0.12493967264890671, + "eval_compot_MAE_w": 0.11044013127684593, + "eval_compot_MAE_x_boxes": 0.25486552715301514, + "eval_compot_MAE_y_boxes": 0.10714587941765785, + "eval_compot_NUM_probability": 0.9995182752609253, + "eval_compot_inside_bbox": 0.2048611119389534, + "eval_compot_loss": 3.0232787132263184, + "eval_compot_loss_ce": 0.008488157531246543, + "eval_compot_loss_iou": 1.064208984375, + "eval_compot_loss_num": 0.194061279296875, + "eval_compot_loss_xval": 3.0986328125, + "eval_compot_runtime": 64.4201, + "eval_compot_samples_per_second": 0.776, + "eval_compot_steps_per_second": 0.031, + "num_input_tokens_seen": 99349752, + "step": 1500 + }, + { + "epoch": 0.14040342584359058, + "eval_custom_ui_MAE_all": 0.12771786376833916, + "eval_custom_ui_MAE_x": 0.11793666705489159, + "eval_custom_ui_MAE_y": 0.13749905303120613, + "eval_custom_ui_NUM_probability": 0.9999288022518158, + "eval_custom_ui_loss": 0.7349903583526611, + "eval_custom_ui_loss_ce": 0.1220637708902359, + "eval_custom_ui_loss_num": 0.1292266845703125, + "eval_custom_ui_loss_xval": 0.646240234375, + "eval_custom_ui_runtime": 56.8825, + "eval_custom_ui_samples_per_second": 0.879, + "eval_custom_ui_steps_per_second": 0.035, + "num_input_tokens_seen": 99349752, + "step": 1500 + }, + { + "epoch": 0.14040342584359058, + "loss": 0.7274760007858276, + "loss_ce": 0.12981978058815002, + "loss_iou": 0.0, + "loss_num": 0.11962890625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 99349752, + "step": 1500 + }, + { + "epoch": 0.14049702812748632, + "grad_norm": 24.231904983520508, + "learning_rate": 5e-05, + "loss": 1.2735, + "num_input_tokens_seen": 99416176, + "step": 1501 + }, + { + "epoch": 0.14049702812748632, + "loss": 1.2148327827453613, + "loss_ce": 0.008076216094195843, + "loss_iou": 0.5234375, + "loss_num": 0.031494140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 99416176, + "step": 1501 + }, + { + "epoch": 0.14059063041138203, + "grad_norm": 55.206363677978516, + "learning_rate": 5e-05, + "loss": 1.3824, + "num_input_tokens_seen": 99482524, + "step": 1502 + }, + { + "epoch": 0.14059063041138203, + "loss": 1.4969196319580078, + "loss_ce": 0.006685256026685238, + "loss_iou": 0.62109375, + "loss_num": 0.048828125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 99482524, + "step": 1502 + }, + { + "epoch": 0.14068423269527777, + "grad_norm": 31.431215286254883, + "learning_rate": 5e-05, + "loss": 1.6985, + "num_input_tokens_seen": 99549584, + "step": 1503 + }, + { + "epoch": 0.14068423269527777, + "loss": 1.6617836952209473, + "loss_ce": 0.005533731542527676, + "loss_iou": 0.73828125, + "loss_num": 0.0361328125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 99549584, + "step": 1503 + }, + { + "epoch": 0.14077783497917348, + "grad_norm": 22.039602279663086, + "learning_rate": 5e-05, + "loss": 1.8213, + "num_input_tokens_seen": 99615260, + "step": 1504 + }, + { + "epoch": 0.14077783497917348, + "loss": 1.7598512172698975, + "loss_ce": 0.0059449695982038975, + "loss_iou": 0.75, + "loss_num": 0.051513671875, + "loss_xval": 1.75, + "num_input_tokens_seen": 99615260, + "step": 1504 + }, + { + "epoch": 0.14087143726306922, + "grad_norm": 20.392858505249023, + "learning_rate": 5e-05, + "loss": 1.4993, + "num_input_tokens_seen": 99681472, + "step": 1505 + }, + { + "epoch": 0.14087143726306922, + "loss": 1.5586025714874268, + "loss_ce": 0.004891672637313604, + "loss_iou": 0.66796875, + "loss_num": 0.042724609375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 99681472, + "step": 1505 + }, + { + "epoch": 0.14096503954696496, + "grad_norm": 28.995214462280273, + "learning_rate": 5e-05, + "loss": 1.4371, + "num_input_tokens_seen": 99747096, + "step": 1506 + }, + { + "epoch": 0.14096503954696496, + "loss": 1.4462305307388306, + "loss_ce": 0.0014062949921935797, + "loss_iou": 0.6328125, + "loss_num": 0.035888671875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 99747096, + "step": 1506 + }, + { + "epoch": 0.14105864183086067, + "grad_norm": 21.191801071166992, + "learning_rate": 5e-05, + "loss": 1.5062, + "num_input_tokens_seen": 99813116, + "step": 1507 + }, + { + "epoch": 0.14105864183086067, + "loss": 1.6104576587677002, + "loss_ce": 0.006453688256442547, + "loss_iou": 0.671875, + "loss_num": 0.052734375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 99813116, + "step": 1507 + }, + { + "epoch": 0.1411522441147564, + "grad_norm": 24.812150955200195, + "learning_rate": 5e-05, + "loss": 1.2635, + "num_input_tokens_seen": 99880304, + "step": 1508 + }, + { + "epoch": 0.1411522441147564, + "loss": 1.4335097074508667, + "loss_ce": 0.005287112668156624, + "loss_iou": 0.59375, + "loss_num": 0.0478515625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 99880304, + "step": 1508 + }, + { + "epoch": 0.14124584639865212, + "grad_norm": 29.584083557128906, + "learning_rate": 5e-05, + "loss": 1.8114, + "num_input_tokens_seen": 99946060, + "step": 1509 + }, + { + "epoch": 0.14124584639865212, + "loss": 1.7381325960159302, + "loss_ce": 0.00864044763147831, + "loss_iou": 0.71875, + "loss_num": 0.058349609375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 99946060, + "step": 1509 + }, + { + "epoch": 0.14133944868254786, + "grad_norm": 17.324613571166992, + "learning_rate": 5e-05, + "loss": 1.3028, + "num_input_tokens_seen": 100011568, + "step": 1510 + }, + { + "epoch": 0.14133944868254786, + "loss": 1.3398258686065674, + "loss_ce": 0.008771165274083614, + "loss_iou": 0.54296875, + "loss_num": 0.049560546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 100011568, + "step": 1510 + }, + { + "epoch": 0.14143305096644357, + "grad_norm": 23.694305419921875, + "learning_rate": 5e-05, + "loss": 1.481, + "num_input_tokens_seen": 100078336, + "step": 1511 + }, + { + "epoch": 0.14143305096644357, + "loss": 1.3688184022903442, + "loss_ce": 0.004560520872473717, + "loss_iou": 0.60546875, + "loss_num": 0.030029296875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 100078336, + "step": 1511 + }, + { + "epoch": 0.1415266532503393, + "grad_norm": 38.424346923828125, + "learning_rate": 5e-05, + "loss": 1.6412, + "num_input_tokens_seen": 100145548, + "step": 1512 + }, + { + "epoch": 0.1415266532503393, + "loss": 1.7066707611083984, + "loss_ce": 0.004522241652011871, + "loss_iou": 0.73046875, + "loss_num": 0.048583984375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 100145548, + "step": 1512 + }, + { + "epoch": 0.14162025553423505, + "grad_norm": 21.91921043395996, + "learning_rate": 5e-05, + "loss": 1.7673, + "num_input_tokens_seen": 100210616, + "step": 1513 + }, + { + "epoch": 0.14162025553423505, + "loss": 1.8947184085845947, + "loss_ce": 0.007999643683433533, + "loss_iou": 0.8125, + "loss_num": 0.05322265625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 100210616, + "step": 1513 + }, + { + "epoch": 0.14171385781813076, + "grad_norm": 38.25762176513672, + "learning_rate": 5e-05, + "loss": 1.4429, + "num_input_tokens_seen": 100275608, + "step": 1514 + }, + { + "epoch": 0.14171385781813076, + "loss": 1.380049467086792, + "loss_ce": 0.002119877375662327, + "loss_iou": 0.515625, + "loss_num": 0.06884765625, + "loss_xval": 1.375, + "num_input_tokens_seen": 100275608, + "step": 1514 + }, + { + "epoch": 0.1418074601020265, + "grad_norm": 10.709073066711426, + "learning_rate": 5e-05, + "loss": 1.4471, + "num_input_tokens_seen": 100341516, + "step": 1515 + }, + { + "epoch": 0.1418074601020265, + "loss": 1.5805141925811768, + "loss_ce": 0.0062953149899840355, + "loss_iou": 0.671875, + "loss_num": 0.046630859375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 100341516, + "step": 1515 + }, + { + "epoch": 0.1419010623859222, + "grad_norm": 20.05232048034668, + "learning_rate": 5e-05, + "loss": 1.4526, + "num_input_tokens_seen": 100408124, + "step": 1516 + }, + { + "epoch": 0.1419010623859222, + "loss": 1.5399630069732666, + "loss_ce": 0.004318465478718281, + "loss_iou": 0.62109375, + "loss_num": 0.058837890625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 100408124, + "step": 1516 + }, + { + "epoch": 0.14199466466981794, + "grad_norm": 32.801998138427734, + "learning_rate": 5e-05, + "loss": 1.4752, + "num_input_tokens_seen": 100474908, + "step": 1517 + }, + { + "epoch": 0.14199466466981794, + "loss": 1.391688585281372, + "loss_ce": 0.0025284471921622753, + "loss_iou": 0.6171875, + "loss_num": 0.03076171875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 100474908, + "step": 1517 + }, + { + "epoch": 0.14208826695371368, + "grad_norm": 23.03963279724121, + "learning_rate": 5e-05, + "loss": 1.4937, + "num_input_tokens_seen": 100541184, + "step": 1518 + }, + { + "epoch": 0.14208826695371368, + "loss": 1.4620805978775024, + "loss_ce": 0.009932130575180054, + "loss_iou": 0.64453125, + "loss_num": 0.032958984375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 100541184, + "step": 1518 + }, + { + "epoch": 0.1421818692376094, + "grad_norm": 26.238849639892578, + "learning_rate": 5e-05, + "loss": 1.5887, + "num_input_tokens_seen": 100607804, + "step": 1519 + }, + { + "epoch": 0.1421818692376094, + "loss": 1.434612512588501, + "loss_ce": 0.0057337647303938866, + "loss_iou": 0.61328125, + "loss_num": 0.039794921875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 100607804, + "step": 1519 + }, + { + "epoch": 0.14227547152150513, + "grad_norm": 17.839048385620117, + "learning_rate": 5e-05, + "loss": 1.2647, + "num_input_tokens_seen": 100673700, + "step": 1520 + }, + { + "epoch": 0.14227547152150513, + "loss": 1.2322412729263306, + "loss_ce": 0.006166993640363216, + "loss_iou": 0.5078125, + "loss_num": 0.042236328125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 100673700, + "step": 1520 + }, + { + "epoch": 0.14236907380540084, + "grad_norm": 18.898719787597656, + "learning_rate": 5e-05, + "loss": 1.3108, + "num_input_tokens_seen": 100741140, + "step": 1521 + }, + { + "epoch": 0.14236907380540084, + "loss": 1.2941036224365234, + "loss_ce": 0.002599644009023905, + "loss_iou": 0.58984375, + "loss_num": 0.022705078125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 100741140, + "step": 1521 + }, + { + "epoch": 0.14246267608929658, + "grad_norm": 27.399078369140625, + "learning_rate": 5e-05, + "loss": 1.6027, + "num_input_tokens_seen": 100807032, + "step": 1522 + }, + { + "epoch": 0.14246267608929658, + "loss": 1.7253706455230713, + "loss_ce": 0.007597208954393864, + "loss_iou": 0.69140625, + "loss_num": 0.0673828125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 100807032, + "step": 1522 + }, + { + "epoch": 0.14255627837319232, + "grad_norm": 40.45325469970703, + "learning_rate": 5e-05, + "loss": 1.4111, + "num_input_tokens_seen": 100874484, + "step": 1523 + }, + { + "epoch": 0.14255627837319232, + "loss": 1.465791940689087, + "loss_ce": 0.006319269072264433, + "loss_iou": 0.6640625, + "loss_num": 0.02587890625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 100874484, + "step": 1523 + }, + { + "epoch": 0.14264988065708803, + "grad_norm": 23.753206253051758, + "learning_rate": 5e-05, + "loss": 1.6924, + "num_input_tokens_seen": 100940064, + "step": 1524 + }, + { + "epoch": 0.14264988065708803, + "loss": 1.8300961256027222, + "loss_ce": 0.0034359393175691366, + "loss_iou": 0.75, + "loss_num": 0.0654296875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 100940064, + "step": 1524 + }, + { + "epoch": 0.14274348294098377, + "grad_norm": 12.655919075012207, + "learning_rate": 5e-05, + "loss": 1.168, + "num_input_tokens_seen": 101006424, + "step": 1525 + }, + { + "epoch": 0.14274348294098377, + "loss": 1.132278323173523, + "loss_ce": 0.0023954908829182386, + "loss_iou": 0.48828125, + "loss_num": 0.03076171875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 101006424, + "step": 1525 + }, + { + "epoch": 0.14283708522487948, + "grad_norm": 23.536968231201172, + "learning_rate": 5e-05, + "loss": 1.4492, + "num_input_tokens_seen": 101071936, + "step": 1526 + }, + { + "epoch": 0.14283708522487948, + "loss": 1.3138073682785034, + "loss_ce": 0.005701958201825619, + "loss_iou": 0.57421875, + "loss_num": 0.0322265625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 101071936, + "step": 1526 + }, + { + "epoch": 0.14293068750877522, + "grad_norm": 20.349735260009766, + "learning_rate": 5e-05, + "loss": 1.4725, + "num_input_tokens_seen": 101137240, + "step": 1527 + }, + { + "epoch": 0.14293068750877522, + "loss": 1.4538692235946655, + "loss_ce": 0.004162219353020191, + "loss_iou": 0.65234375, + "loss_num": 0.0283203125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 101137240, + "step": 1527 + }, + { + "epoch": 0.14302428979267093, + "grad_norm": 34.49251174926758, + "learning_rate": 5e-05, + "loss": 1.6424, + "num_input_tokens_seen": 101202700, + "step": 1528 + }, + { + "epoch": 0.14302428979267093, + "loss": 1.6864709854125977, + "loss_ce": 0.008736604824662209, + "loss_iou": 0.6953125, + "loss_num": 0.05712890625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 101202700, + "step": 1528 + }, + { + "epoch": 0.14311789207656667, + "grad_norm": 24.006309509277344, + "learning_rate": 5e-05, + "loss": 1.6988, + "num_input_tokens_seen": 101269828, + "step": 1529 + }, + { + "epoch": 0.14311789207656667, + "loss": 1.5257787704467773, + "loss_ce": 0.0033177491277456284, + "loss_iou": 0.68359375, + "loss_num": 0.0303955078125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 101269828, + "step": 1529 + }, + { + "epoch": 0.1432114943604624, + "grad_norm": 50.223751068115234, + "learning_rate": 5e-05, + "loss": 1.524, + "num_input_tokens_seen": 101335964, + "step": 1530 + }, + { + "epoch": 0.1432114943604624, + "loss": 1.5145683288574219, + "loss_ce": 0.0038261013105511665, + "loss_iou": 0.64453125, + "loss_num": 0.0439453125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 101335964, + "step": 1530 + }, + { + "epoch": 0.14330509664435812, + "grad_norm": 16.774459838867188, + "learning_rate": 5e-05, + "loss": 1.6343, + "num_input_tokens_seen": 101401128, + "step": 1531 + }, + { + "epoch": 0.14330509664435812, + "loss": 1.313849687576294, + "loss_ce": 0.0026925490237772465, + "loss_iou": 0.6015625, + "loss_num": 0.02197265625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 101401128, + "step": 1531 + }, + { + "epoch": 0.14339869892825385, + "grad_norm": 18.925683975219727, + "learning_rate": 5e-05, + "loss": 1.3889, + "num_input_tokens_seen": 101468268, + "step": 1532 + }, + { + "epoch": 0.14339869892825385, + "loss": 1.3102495670318604, + "loss_ce": 0.0070268744602799416, + "loss_iou": 0.58203125, + "loss_num": 0.0279541015625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 101468268, + "step": 1532 + }, + { + "epoch": 0.14349230121214956, + "grad_norm": 18.31987762451172, + "learning_rate": 5e-05, + "loss": 1.56, + "num_input_tokens_seen": 101533604, + "step": 1533 + }, + { + "epoch": 0.14349230121214956, + "loss": 1.6439417600631714, + "loss_ce": 0.007223045919090509, + "loss_iou": 0.67578125, + "loss_num": 0.05712890625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 101533604, + "step": 1533 + }, + { + "epoch": 0.1435859034960453, + "grad_norm": 40.049530029296875, + "learning_rate": 5e-05, + "loss": 1.4117, + "num_input_tokens_seen": 101598752, + "step": 1534 + }, + { + "epoch": 0.1435859034960453, + "loss": 1.3404393196105957, + "loss_ce": 0.009323660284280777, + "loss_iou": 0.53125, + "loss_num": 0.05322265625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 101598752, + "step": 1534 + }, + { + "epoch": 0.14367950577994104, + "grad_norm": 20.997421264648438, + "learning_rate": 5e-05, + "loss": 1.2269, + "num_input_tokens_seen": 101663404, + "step": 1535 + }, + { + "epoch": 0.14367950577994104, + "loss": 0.9025107622146606, + "loss_ce": 0.003432357916608453, + "loss_iou": 0.3828125, + "loss_num": 0.0269775390625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 101663404, + "step": 1535 + }, + { + "epoch": 0.14377310806383675, + "grad_norm": 24.92330551147461, + "learning_rate": 5e-05, + "loss": 1.3334, + "num_input_tokens_seen": 101729156, + "step": 1536 + }, + { + "epoch": 0.14377310806383675, + "loss": 1.4888564348220825, + "loss_ce": 0.005458023399114609, + "loss_iou": 0.63671875, + "loss_num": 0.041748046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 101729156, + "step": 1536 + }, + { + "epoch": 0.1438667103477325, + "grad_norm": 38.17001724243164, + "learning_rate": 5e-05, + "loss": 1.4925, + "num_input_tokens_seen": 101795184, + "step": 1537 + }, + { + "epoch": 0.1438667103477325, + "loss": 1.4716002941131592, + "loss_ce": 0.0033385634887963533, + "loss_iou": 0.62109375, + "loss_num": 0.04443359375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 101795184, + "step": 1537 + }, + { + "epoch": 0.1439603126316282, + "grad_norm": 52.16805648803711, + "learning_rate": 5e-05, + "loss": 1.5786, + "num_input_tokens_seen": 101860244, + "step": 1538 + }, + { + "epoch": 0.1439603126316282, + "loss": 1.6646236181259155, + "loss_ce": 0.0036128046922385693, + "loss_iou": 0.65234375, + "loss_num": 0.0712890625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 101860244, + "step": 1538 + }, + { + "epoch": 0.14405391491552394, + "grad_norm": 18.758058547973633, + "learning_rate": 5e-05, + "loss": 1.7187, + "num_input_tokens_seen": 101926872, + "step": 1539 + }, + { + "epoch": 0.14405391491552394, + "loss": 1.78778076171875, + "loss_ce": 0.004577621817588806, + "loss_iou": 0.73828125, + "loss_num": 0.06103515625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 101926872, + "step": 1539 + }, + { + "epoch": 0.14414751719941968, + "grad_norm": 33.0322151184082, + "learning_rate": 5e-05, + "loss": 1.4377, + "num_input_tokens_seen": 101992900, + "step": 1540 + }, + { + "epoch": 0.14414751719941968, + "loss": 1.5978648662567139, + "loss_ce": 0.003138265572488308, + "loss_iou": 0.6484375, + "loss_num": 0.06005859375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 101992900, + "step": 1540 + }, + { + "epoch": 0.1442411194833154, + "grad_norm": 26.099706649780273, + "learning_rate": 5e-05, + "loss": 1.4051, + "num_input_tokens_seen": 102058808, + "step": 1541 + }, + { + "epoch": 0.1442411194833154, + "loss": 1.4880197048187256, + "loss_ce": 0.0031562973745167255, + "loss_iou": 0.63671875, + "loss_num": 0.0419921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 102058808, + "step": 1541 + }, + { + "epoch": 0.14433472176721113, + "grad_norm": 22.058637619018555, + "learning_rate": 5e-05, + "loss": 1.5436, + "num_input_tokens_seen": 102125724, + "step": 1542 + }, + { + "epoch": 0.14433472176721113, + "loss": 1.756797194480896, + "loss_ce": 0.003867490915581584, + "loss_iou": 0.76171875, + "loss_num": 0.045654296875, + "loss_xval": 1.75, + "num_input_tokens_seen": 102125724, + "step": 1542 + }, + { + "epoch": 0.14442832405110684, + "grad_norm": 19.189023971557617, + "learning_rate": 5e-05, + "loss": 1.3314, + "num_input_tokens_seen": 102191628, + "step": 1543 + }, + { + "epoch": 0.14442832405110684, + "loss": 1.3758902549743652, + "loss_ce": 0.0018668891862034798, + "loss_iou": 0.58984375, + "loss_num": 0.03955078125, + "loss_xval": 1.375, + "num_input_tokens_seen": 102191628, + "step": 1543 + }, + { + "epoch": 0.14452192633500258, + "grad_norm": 18.584917068481445, + "learning_rate": 5e-05, + "loss": 1.6219, + "num_input_tokens_seen": 102256720, + "step": 1544 + }, + { + "epoch": 0.14452192633500258, + "loss": 1.7577810287475586, + "loss_ce": 0.0019217421067878604, + "loss_iou": 0.7421875, + "loss_num": 0.0537109375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 102256720, + "step": 1544 + }, + { + "epoch": 0.14461552861889831, + "grad_norm": 14.617402076721191, + "learning_rate": 5e-05, + "loss": 1.5229, + "num_input_tokens_seen": 102322440, + "step": 1545 + }, + { + "epoch": 0.14461552861889831, + "loss": 1.4960970878601074, + "loss_ce": 0.004397881217300892, + "loss_iou": 0.62109375, + "loss_num": 0.05029296875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 102322440, + "step": 1545 + }, + { + "epoch": 0.14470913090279403, + "grad_norm": 21.33815574645996, + "learning_rate": 5e-05, + "loss": 1.3602, + "num_input_tokens_seen": 102388084, + "step": 1546 + }, + { + "epoch": 0.14470913090279403, + "loss": 1.1827688217163086, + "loss_ce": 0.005522767081856728, + "loss_iou": 0.484375, + "loss_num": 0.04150390625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 102388084, + "step": 1546 + }, + { + "epoch": 0.14480273318668976, + "grad_norm": 42.99497604370117, + "learning_rate": 5e-05, + "loss": 1.6943, + "num_input_tokens_seen": 102454208, + "step": 1547 + }, + { + "epoch": 0.14480273318668976, + "loss": 1.6959104537963867, + "loss_ce": 0.003527591936290264, + "loss_iou": 0.71484375, + "loss_num": 0.052978515625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 102454208, + "step": 1547 + }, + { + "epoch": 0.14489633547058547, + "grad_norm": 33.810691833496094, + "learning_rate": 5e-05, + "loss": 1.7902, + "num_input_tokens_seen": 102519784, + "step": 1548 + }, + { + "epoch": 0.14489633547058547, + "loss": 1.601817011833191, + "loss_ce": 0.004649089649319649, + "loss_iou": 0.6953125, + "loss_num": 0.041748046875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 102519784, + "step": 1548 + }, + { + "epoch": 0.1449899377544812, + "grad_norm": 20.305252075195312, + "learning_rate": 5e-05, + "loss": 1.5246, + "num_input_tokens_seen": 102586960, + "step": 1549 + }, + { + "epoch": 0.1449899377544812, + "loss": 1.528454065322876, + "loss_ce": 0.0050166333094239235, + "loss_iou": 0.6875, + "loss_num": 0.0302734375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 102586960, + "step": 1549 + }, + { + "epoch": 0.14508354003837692, + "grad_norm": 15.906241416931152, + "learning_rate": 5e-05, + "loss": 1.3853, + "num_input_tokens_seen": 102654008, + "step": 1550 + }, + { + "epoch": 0.14508354003837692, + "loss": 1.4486968517303467, + "loss_ce": 0.0014311351114884019, + "loss_iou": 0.6171875, + "loss_num": 0.04248046875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 102654008, + "step": 1550 + }, + { + "epoch": 0.14517714232227266, + "grad_norm": 18.253984451293945, + "learning_rate": 5e-05, + "loss": 1.2605, + "num_input_tokens_seen": 102720632, + "step": 1551 + }, + { + "epoch": 0.14517714232227266, + "loss": 1.1389744281768799, + "loss_ce": 0.0037205801345407963, + "loss_iou": 0.50390625, + "loss_num": 0.025146484375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 102720632, + "step": 1551 + }, + { + "epoch": 0.1452707446061684, + "grad_norm": 19.899873733520508, + "learning_rate": 5e-05, + "loss": 1.5484, + "num_input_tokens_seen": 102787532, + "step": 1552 + }, + { + "epoch": 0.1452707446061684, + "loss": 1.6164824962615967, + "loss_ce": 0.004177856259047985, + "loss_iou": 0.69921875, + "loss_num": 0.042236328125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 102787532, + "step": 1552 + }, + { + "epoch": 0.1453643468900641, + "grad_norm": 25.263181686401367, + "learning_rate": 5e-05, + "loss": 1.7018, + "num_input_tokens_seen": 102854264, + "step": 1553 + }, + { + "epoch": 0.1453643468900641, + "loss": 1.6837835311889648, + "loss_ce": 0.003119437489658594, + "loss_iou": 0.72265625, + "loss_num": 0.04736328125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 102854264, + "step": 1553 + }, + { + "epoch": 0.14545794917395985, + "grad_norm": 20.906625747680664, + "learning_rate": 5e-05, + "loss": 1.5819, + "num_input_tokens_seen": 102921292, + "step": 1554 + }, + { + "epoch": 0.14545794917395985, + "loss": 1.5744715929031372, + "loss_ce": 0.005135647486895323, + "loss_iou": 0.6953125, + "loss_num": 0.03662109375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 102921292, + "step": 1554 + }, + { + "epoch": 0.14555155145785556, + "grad_norm": 28.993274688720703, + "learning_rate": 5e-05, + "loss": 1.5309, + "num_input_tokens_seen": 102987228, + "step": 1555 + }, + { + "epoch": 0.14555155145785556, + "loss": 1.6445033550262451, + "loss_ce": 0.005343172233551741, + "loss_iou": 0.70703125, + "loss_num": 0.045166015625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 102987228, + "step": 1555 + }, + { + "epoch": 0.1456451537417513, + "grad_norm": 20.963346481323242, + "learning_rate": 5e-05, + "loss": 1.6217, + "num_input_tokens_seen": 103053636, + "step": 1556 + }, + { + "epoch": 0.1456451537417513, + "loss": 1.5372929573059082, + "loss_ce": 0.00604294566437602, + "loss_iou": 0.69140625, + "loss_num": 0.0303955078125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 103053636, + "step": 1556 + }, + { + "epoch": 0.14573875602564704, + "grad_norm": 19.246431350708008, + "learning_rate": 5e-05, + "loss": 1.4329, + "num_input_tokens_seen": 103119868, + "step": 1557 + }, + { + "epoch": 0.14573875602564704, + "loss": 1.3317689895629883, + "loss_ce": 0.005108893848955631, + "loss_iou": 0.58984375, + "loss_num": 0.0299072265625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 103119868, + "step": 1557 + }, + { + "epoch": 0.14583235830954275, + "grad_norm": 23.084415435791016, + "learning_rate": 5e-05, + "loss": 1.2641, + "num_input_tokens_seen": 103185364, + "step": 1558 + }, + { + "epoch": 0.14583235830954275, + "loss": 1.3581790924072266, + "loss_ce": 0.006616611499339342, + "loss_iou": 0.59375, + "loss_num": 0.033447265625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 103185364, + "step": 1558 + }, + { + "epoch": 0.1459259605934385, + "grad_norm": 23.241357803344727, + "learning_rate": 5e-05, + "loss": 1.5636, + "num_input_tokens_seen": 103250600, + "step": 1559 + }, + { + "epoch": 0.1459259605934385, + "loss": 1.4889763593673706, + "loss_ce": 0.0070427171885967255, + "loss_iou": 0.6015625, + "loss_num": 0.05615234375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 103250600, + "step": 1559 + }, + { + "epoch": 0.1460195628773342, + "grad_norm": 27.193466186523438, + "learning_rate": 5e-05, + "loss": 1.3414, + "num_input_tokens_seen": 103317144, + "step": 1560 + }, + { + "epoch": 0.1460195628773342, + "loss": 1.224779486656189, + "loss_ce": 0.002123260870575905, + "loss_iou": 0.578125, + "loss_num": 0.013427734375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 103317144, + "step": 1560 + }, + { + "epoch": 0.14611316516122994, + "grad_norm": 26.30657196044922, + "learning_rate": 5e-05, + "loss": 1.7686, + "num_input_tokens_seen": 103383100, + "step": 1561 + }, + { + "epoch": 0.14611316516122994, + "loss": 1.762037754058838, + "loss_ce": 0.002272146288305521, + "loss_iou": 0.75, + "loss_num": 0.0517578125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 103383100, + "step": 1561 + }, + { + "epoch": 0.14620676744512567, + "grad_norm": 10.657283782958984, + "learning_rate": 5e-05, + "loss": 1.5322, + "num_input_tokens_seen": 103449580, + "step": 1562 + }, + { + "epoch": 0.14620676744512567, + "loss": 1.514688491821289, + "loss_ce": 0.004922892898321152, + "loss_iou": 0.60546875, + "loss_num": 0.059326171875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 103449580, + "step": 1562 + }, + { + "epoch": 0.14630036972902138, + "grad_norm": 17.780200958251953, + "learning_rate": 5e-05, + "loss": 1.3093, + "num_input_tokens_seen": 103515492, + "step": 1563 + }, + { + "epoch": 0.14630036972902138, + "loss": 1.493285894393921, + "loss_ce": 0.0020749655086547136, + "loss_iou": 0.6484375, + "loss_num": 0.0390625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 103515492, + "step": 1563 + }, + { + "epoch": 0.14639397201291712, + "grad_norm": 16.034446716308594, + "learning_rate": 5e-05, + "loss": 1.5579, + "num_input_tokens_seen": 103582312, + "step": 1564 + }, + { + "epoch": 0.14639397201291712, + "loss": 1.7352726459503174, + "loss_ce": 0.002850731136277318, + "loss_iou": 0.7578125, + "loss_num": 0.04296875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 103582312, + "step": 1564 + }, + { + "epoch": 0.14648757429681283, + "grad_norm": 21.146360397338867, + "learning_rate": 5e-05, + "loss": 1.5004, + "num_input_tokens_seen": 103648172, + "step": 1565 + }, + { + "epoch": 0.14648757429681283, + "loss": 1.412261962890625, + "loss_ce": 0.006988446228206158, + "loss_iou": 0.61328125, + "loss_num": 0.034912109375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 103648172, + "step": 1565 + }, + { + "epoch": 0.14658117658070857, + "grad_norm": 84.80882263183594, + "learning_rate": 5e-05, + "loss": 1.4568, + "num_input_tokens_seen": 103713988, + "step": 1566 + }, + { + "epoch": 0.14658117658070857, + "loss": 1.646254539489746, + "loss_ce": 0.0036764289252460003, + "loss_iou": 0.7109375, + "loss_num": 0.04345703125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 103713988, + "step": 1566 + }, + { + "epoch": 0.14667477886460428, + "grad_norm": 15.519054412841797, + "learning_rate": 5e-05, + "loss": 1.5439, + "num_input_tokens_seen": 103780696, + "step": 1567 + }, + { + "epoch": 0.14667477886460428, + "loss": 1.5708928108215332, + "loss_ce": 0.006927861366420984, + "loss_iou": 0.62109375, + "loss_num": 0.064453125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 103780696, + "step": 1567 + }, + { + "epoch": 0.14676838114850002, + "grad_norm": 18.43244171142578, + "learning_rate": 5e-05, + "loss": 1.2816, + "num_input_tokens_seen": 103846680, + "step": 1568 + }, + { + "epoch": 0.14676838114850002, + "loss": 1.2332091331481934, + "loss_ce": 0.0032285857014358044, + "loss_iou": 0.5546875, + "loss_num": 0.0238037109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 103846680, + "step": 1568 + }, + { + "epoch": 0.14686198343239576, + "grad_norm": 21.82929801940918, + "learning_rate": 5e-05, + "loss": 1.3426, + "num_input_tokens_seen": 103912976, + "step": 1569 + }, + { + "epoch": 0.14686198343239576, + "loss": 1.288723111152649, + "loss_ce": 0.005642055068165064, + "loss_iou": 0.55859375, + "loss_num": 0.033203125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 103912976, + "step": 1569 + }, + { + "epoch": 0.14695558571629147, + "grad_norm": 38.64533996582031, + "learning_rate": 5e-05, + "loss": 1.554, + "num_input_tokens_seen": 103980544, + "step": 1570 + }, + { + "epoch": 0.14695558571629147, + "loss": 1.687143325805664, + "loss_ce": 0.0025730598717927933, + "loss_iou": 0.74609375, + "loss_num": 0.0390625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 103980544, + "step": 1570 + }, + { + "epoch": 0.1470491880001872, + "grad_norm": 16.56947135925293, + "learning_rate": 5e-05, + "loss": 1.6906, + "num_input_tokens_seen": 104046316, + "step": 1571 + }, + { + "epoch": 0.1470491880001872, + "loss": 1.5583475828170776, + "loss_ce": 0.007078057155013084, + "loss_iou": 0.6640625, + "loss_num": 0.04345703125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 104046316, + "step": 1571 + }, + { + "epoch": 0.14714279028408292, + "grad_norm": 44.03985595703125, + "learning_rate": 5e-05, + "loss": 1.4482, + "num_input_tokens_seen": 104112700, + "step": 1572 + }, + { + "epoch": 0.14714279028408292, + "loss": 1.4707307815551758, + "loss_ce": 0.0017366930842399597, + "loss_iou": 0.6015625, + "loss_num": 0.052978515625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 104112700, + "step": 1572 + }, + { + "epoch": 0.14723639256797866, + "grad_norm": 22.382993698120117, + "learning_rate": 5e-05, + "loss": 1.4796, + "num_input_tokens_seen": 104179164, + "step": 1573 + }, + { + "epoch": 0.14723639256797866, + "loss": 1.465185523033142, + "loss_ce": 0.004736325703561306, + "loss_iou": 0.609375, + "loss_num": 0.049072265625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 104179164, + "step": 1573 + }, + { + "epoch": 0.1473299948518744, + "grad_norm": 31.945423126220703, + "learning_rate": 5e-05, + "loss": 1.8342, + "num_input_tokens_seen": 104245728, + "step": 1574 + }, + { + "epoch": 0.1473299948518744, + "loss": 1.7808973789215088, + "loss_ce": 0.0035536657087504864, + "loss_iou": 0.75, + "loss_num": 0.0556640625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 104245728, + "step": 1574 + }, + { + "epoch": 0.1474235971357701, + "grad_norm": 165.14805603027344, + "learning_rate": 5e-05, + "loss": 1.1913, + "num_input_tokens_seen": 104311864, + "step": 1575 + }, + { + "epoch": 0.1474235971357701, + "loss": 1.2625303268432617, + "loss_ce": 0.007708542514592409, + "loss_iou": 0.48828125, + "loss_num": 0.055908203125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 104311864, + "step": 1575 + }, + { + "epoch": 0.14751719941966585, + "grad_norm": 30.921964645385742, + "learning_rate": 5e-05, + "loss": 1.2002, + "num_input_tokens_seen": 104378064, + "step": 1576 + }, + { + "epoch": 0.14751719941966585, + "loss": 1.21901273727417, + "loss_ce": 0.002215783577412367, + "loss_iou": 0.51171875, + "loss_num": 0.039306640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 104378064, + "step": 1576 + }, + { + "epoch": 0.14761080170356156, + "grad_norm": 29.67722511291504, + "learning_rate": 5e-05, + "loss": 1.4842, + "num_input_tokens_seen": 104444520, + "step": 1577 + }, + { + "epoch": 0.14761080170356156, + "loss": 1.4530389308929443, + "loss_ce": 0.006749823689460754, + "loss_iou": 0.58984375, + "loss_num": 0.052978515625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 104444520, + "step": 1577 + }, + { + "epoch": 0.1477044039874573, + "grad_norm": 33.292762756347656, + "learning_rate": 5e-05, + "loss": 1.4454, + "num_input_tokens_seen": 104510820, + "step": 1578 + }, + { + "epoch": 0.1477044039874573, + "loss": 1.3860068321228027, + "loss_ce": 0.004048776812851429, + "loss_iou": 0.62890625, + "loss_num": 0.0255126953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 104510820, + "step": 1578 + }, + { + "epoch": 0.14779800627135303, + "grad_norm": 27.22444725036621, + "learning_rate": 5e-05, + "loss": 1.6401, + "num_input_tokens_seen": 104576180, + "step": 1579 + }, + { + "epoch": 0.14779800627135303, + "loss": 1.614316463470459, + "loss_ce": 0.003720806445926428, + "loss_iou": 0.6796875, + "loss_num": 0.051513671875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 104576180, + "step": 1579 + }, + { + "epoch": 0.14789160855524874, + "grad_norm": 18.634841918945312, + "learning_rate": 5e-05, + "loss": 1.6994, + "num_input_tokens_seen": 104642568, + "step": 1580 + }, + { + "epoch": 0.14789160855524874, + "loss": 1.7484934329986572, + "loss_ce": 0.004352916032075882, + "loss_iou": 0.72265625, + "loss_num": 0.060302734375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 104642568, + "step": 1580 + }, + { + "epoch": 0.14798521083914448, + "grad_norm": 17.323060989379883, + "learning_rate": 5e-05, + "loss": 1.4623, + "num_input_tokens_seen": 104709464, + "step": 1581 + }, + { + "epoch": 0.14798521083914448, + "loss": 1.5736188888549805, + "loss_ce": 0.005259455647319555, + "loss_iou": 0.65625, + "loss_num": 0.051513671875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 104709464, + "step": 1581 + }, + { + "epoch": 0.1480788131230402, + "grad_norm": 23.767314910888672, + "learning_rate": 5e-05, + "loss": 1.6097, + "num_input_tokens_seen": 104776000, + "step": 1582 + }, + { + "epoch": 0.1480788131230402, + "loss": 1.6807904243469238, + "loss_ce": 0.003056036541238427, + "loss_iou": 0.70703125, + "loss_num": 0.052978515625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 104776000, + "step": 1582 + }, + { + "epoch": 0.14817241540693593, + "grad_norm": 23.845067977905273, + "learning_rate": 5e-05, + "loss": 1.7156, + "num_input_tokens_seen": 104842072, + "step": 1583 + }, + { + "epoch": 0.14817241540693593, + "loss": 1.6568188667297363, + "loss_ce": 0.004475062713027, + "loss_iou": 0.671875, + "loss_num": 0.061767578125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 104842072, + "step": 1583 + }, + { + "epoch": 0.14826601769083167, + "grad_norm": 14.527493476867676, + "learning_rate": 5e-05, + "loss": 1.7741, + "num_input_tokens_seen": 104908084, + "step": 1584 + }, + { + "epoch": 0.14826601769083167, + "loss": 2.057866334915161, + "loss_ce": 0.004155375994741917, + "loss_iou": 0.80859375, + "loss_num": 0.0869140625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 104908084, + "step": 1584 + }, + { + "epoch": 0.14835961997472738, + "grad_norm": 18.82594871520996, + "learning_rate": 5e-05, + "loss": 1.4649, + "num_input_tokens_seen": 104973872, + "step": 1585 + }, + { + "epoch": 0.14835961997472738, + "loss": 1.4868476390838623, + "loss_ce": 0.008698174729943275, + "loss_iou": 0.60546875, + "loss_num": 0.052978515625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 104973872, + "step": 1585 + }, + { + "epoch": 0.14845322225862312, + "grad_norm": 35.864688873291016, + "learning_rate": 5e-05, + "loss": 1.5298, + "num_input_tokens_seen": 105039740, + "step": 1586 + }, + { + "epoch": 0.14845322225862312, + "loss": 1.4475951194763184, + "loss_ce": 0.002038555219769478, + "loss_iou": 0.6171875, + "loss_num": 0.04296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 105039740, + "step": 1586 + }, + { + "epoch": 0.14854682454251883, + "grad_norm": 25.220077514648438, + "learning_rate": 5e-05, + "loss": 1.9283, + "num_input_tokens_seen": 105105680, + "step": 1587 + }, + { + "epoch": 0.14854682454251883, + "loss": 1.9239721298217773, + "loss_ce": 0.003073735162615776, + "loss_iou": 0.78515625, + "loss_num": 0.06982421875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 105105680, + "step": 1587 + }, + { + "epoch": 0.14864042682641457, + "grad_norm": 28.7579288482666, + "learning_rate": 5e-05, + "loss": 1.4026, + "num_input_tokens_seen": 105172340, + "step": 1588 + }, + { + "epoch": 0.14864042682641457, + "loss": 1.445113182067871, + "loss_ce": 0.0037068475503474474, + "loss_iou": 0.5859375, + "loss_num": 0.05419921875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 105172340, + "step": 1588 + }, + { + "epoch": 0.14873402911031028, + "grad_norm": 22.010601043701172, + "learning_rate": 5e-05, + "loss": 1.339, + "num_input_tokens_seen": 105238736, + "step": 1589 + }, + { + "epoch": 0.14873402911031028, + "loss": 1.2648894786834717, + "loss_ce": 0.004635551478713751, + "loss_iou": 0.53515625, + "loss_num": 0.038330078125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 105238736, + "step": 1589 + }, + { + "epoch": 0.14882763139420602, + "grad_norm": 21.327199935913086, + "learning_rate": 5e-05, + "loss": 1.7862, + "num_input_tokens_seen": 105305376, + "step": 1590 + }, + { + "epoch": 0.14882763139420602, + "loss": 1.929335594177246, + "loss_ce": 0.0055074915289878845, + "loss_iou": 0.78515625, + "loss_num": 0.0712890625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 105305376, + "step": 1590 + }, + { + "epoch": 0.14892123367810176, + "grad_norm": 32.1934928894043, + "learning_rate": 5e-05, + "loss": 1.2378, + "num_input_tokens_seen": 105370324, + "step": 1591 + }, + { + "epoch": 0.14892123367810176, + "loss": 1.1679826974868774, + "loss_ce": 0.007628083229064941, + "loss_iou": 0.4609375, + "loss_num": 0.04736328125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 105370324, + "step": 1591 + }, + { + "epoch": 0.14901483596199747, + "grad_norm": 16.672101974487305, + "learning_rate": 5e-05, + "loss": 1.7804, + "num_input_tokens_seen": 105437024, + "step": 1592 + }, + { + "epoch": 0.14901483596199747, + "loss": 1.6345713138580322, + "loss_ce": 0.010547962039709091, + "loss_iou": 0.71875, + "loss_num": 0.037841796875, + "loss_xval": 1.625, + "num_input_tokens_seen": 105437024, + "step": 1592 + }, + { + "epoch": 0.1491084382458932, + "grad_norm": 20.593778610229492, + "learning_rate": 5e-05, + "loss": 1.4119, + "num_input_tokens_seen": 105502924, + "step": 1593 + }, + { + "epoch": 0.1491084382458932, + "loss": 1.3221817016601562, + "loss_ce": 0.004798830486834049, + "loss_iou": 0.5625, + "loss_num": 0.03857421875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 105502924, + "step": 1593 + }, + { + "epoch": 0.14920204052978892, + "grad_norm": 41.04405975341797, + "learning_rate": 5e-05, + "loss": 1.3331, + "num_input_tokens_seen": 105569204, + "step": 1594 + }, + { + "epoch": 0.14920204052978892, + "loss": 1.1906969547271729, + "loss_ce": 0.003196998964995146, + "loss_iou": 0.51171875, + "loss_num": 0.032470703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 105569204, + "step": 1594 + }, + { + "epoch": 0.14929564281368465, + "grad_norm": 16.740083694458008, + "learning_rate": 5e-05, + "loss": 1.9196, + "num_input_tokens_seen": 105634912, + "step": 1595 + }, + { + "epoch": 0.14929564281368465, + "loss": 1.811830759048462, + "loss_ce": 0.00226047751493752, + "loss_iou": 0.8125, + "loss_num": 0.0361328125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 105634912, + "step": 1595 + }, + { + "epoch": 0.1493892450975804, + "grad_norm": 20.989532470703125, + "learning_rate": 5e-05, + "loss": 1.5249, + "num_input_tokens_seen": 105701448, + "step": 1596 + }, + { + "epoch": 0.1493892450975804, + "loss": 1.7201259136199951, + "loss_ce": 0.01016505528241396, + "loss_iou": 0.6640625, + "loss_num": 0.07666015625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 105701448, + "step": 1596 + }, + { + "epoch": 0.1494828473814761, + "grad_norm": 25.692968368530273, + "learning_rate": 5e-05, + "loss": 1.352, + "num_input_tokens_seen": 105768228, + "step": 1597 + }, + { + "epoch": 0.1494828473814761, + "loss": 1.401670217514038, + "loss_ce": 0.0022561508230865, + "loss_iou": 0.58984375, + "loss_num": 0.043701171875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 105768228, + "step": 1597 + }, + { + "epoch": 0.14957644966537184, + "grad_norm": 85.5755386352539, + "learning_rate": 5e-05, + "loss": 1.4141, + "num_input_tokens_seen": 105834624, + "step": 1598 + }, + { + "epoch": 0.14957644966537184, + "loss": 1.4686968326568604, + "loss_ce": 0.0058061955496668816, + "loss_iou": 0.6484375, + "loss_num": 0.03369140625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 105834624, + "step": 1598 + }, + { + "epoch": 0.14967005194926755, + "grad_norm": 15.782243728637695, + "learning_rate": 5e-05, + "loss": 1.3049, + "num_input_tokens_seen": 105899836, + "step": 1599 + }, + { + "epoch": 0.14967005194926755, + "loss": 1.0485551357269287, + "loss_ce": 0.005464324727654457, + "loss_iou": 0.408203125, + "loss_num": 0.045654296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 105899836, + "step": 1599 + }, + { + "epoch": 0.1497636542331633, + "grad_norm": 102.48106384277344, + "learning_rate": 5e-05, + "loss": 1.4776, + "num_input_tokens_seen": 105965848, + "step": 1600 + }, + { + "epoch": 0.1497636542331633, + "loss": 1.5239989757537842, + "loss_ce": 0.004467761144042015, + "loss_iou": 0.640625, + "loss_num": 0.046630859375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 105965848, + "step": 1600 + }, + { + "epoch": 0.14985725651705903, + "grad_norm": 29.419294357299805, + "learning_rate": 5e-05, + "loss": 1.4311, + "num_input_tokens_seen": 106032240, + "step": 1601 + }, + { + "epoch": 0.14985725651705903, + "loss": 1.4856876134872437, + "loss_ce": 0.00424232380464673, + "loss_iou": 0.6328125, + "loss_num": 0.043701171875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 106032240, + "step": 1601 + }, + { + "epoch": 0.14995085880095474, + "grad_norm": 18.31100845336914, + "learning_rate": 5e-05, + "loss": 1.4293, + "num_input_tokens_seen": 106097984, + "step": 1602 + }, + { + "epoch": 0.14995085880095474, + "loss": 1.3015904426574707, + "loss_ce": 0.005692100618034601, + "loss_iou": 0.58203125, + "loss_num": 0.025634765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 106097984, + "step": 1602 + }, + { + "epoch": 0.15004446108485048, + "grad_norm": 16.438608169555664, + "learning_rate": 5e-05, + "loss": 1.3127, + "num_input_tokens_seen": 106163944, + "step": 1603 + }, + { + "epoch": 0.15004446108485048, + "loss": 1.2673999071121216, + "loss_ce": 0.008610883727669716, + "loss_iou": 0.490234375, + "loss_num": 0.0556640625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 106163944, + "step": 1603 + }, + { + "epoch": 0.1501380633687462, + "grad_norm": 19.66848373413086, + "learning_rate": 5e-05, + "loss": 1.44, + "num_input_tokens_seen": 106230308, + "step": 1604 + }, + { + "epoch": 0.1501380633687462, + "loss": 1.5320746898651123, + "loss_ce": 0.001801289152354002, + "loss_iou": 0.65625, + "loss_num": 0.04443359375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 106230308, + "step": 1604 + }, + { + "epoch": 0.15023166565264193, + "grad_norm": 24.82718276977539, + "learning_rate": 5e-05, + "loss": 1.6418, + "num_input_tokens_seen": 106296544, + "step": 1605 + }, + { + "epoch": 0.15023166565264193, + "loss": 1.6388241052627563, + "loss_ce": 0.006988205946981907, + "loss_iou": 0.7109375, + "loss_num": 0.042724609375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 106296544, + "step": 1605 + }, + { + "epoch": 0.15032526793653767, + "grad_norm": 21.777324676513672, + "learning_rate": 5e-05, + "loss": 1.6035, + "num_input_tokens_seen": 106363548, + "step": 1606 + }, + { + "epoch": 0.15032526793653767, + "loss": 1.555410385131836, + "loss_ce": 0.003652624785900116, + "loss_iou": 0.69140625, + "loss_num": 0.03369140625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 106363548, + "step": 1606 + }, + { + "epoch": 0.15041887022043338, + "grad_norm": 55.833648681640625, + "learning_rate": 5e-05, + "loss": 1.3487, + "num_input_tokens_seen": 106428844, + "step": 1607 + }, + { + "epoch": 0.15041887022043338, + "loss": 1.3498376607894897, + "loss_ce": 0.00279181613586843, + "loss_iou": 0.5625, + "loss_num": 0.044921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 106428844, + "step": 1607 + }, + { + "epoch": 0.15051247250432911, + "grad_norm": 27.31374168395996, + "learning_rate": 5e-05, + "loss": 1.6015, + "num_input_tokens_seen": 106494660, + "step": 1608 + }, + { + "epoch": 0.15051247250432911, + "loss": 1.735952377319336, + "loss_ce": 0.005483636632561684, + "loss_iou": 0.7109375, + "loss_num": 0.0615234375, + "loss_xval": 1.734375, + "num_input_tokens_seen": 106494660, + "step": 1608 + }, + { + "epoch": 0.15060607478822483, + "grad_norm": 38.36038589477539, + "learning_rate": 5e-05, + "loss": 1.4511, + "num_input_tokens_seen": 106560668, + "step": 1609 + }, + { + "epoch": 0.15060607478822483, + "loss": 1.3938336372375488, + "loss_ce": 0.006870659068226814, + "loss_iou": 0.56640625, + "loss_num": 0.05078125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 106560668, + "step": 1609 + }, + { + "epoch": 0.15069967707212056, + "grad_norm": 18.214885711669922, + "learning_rate": 5e-05, + "loss": 1.7583, + "num_input_tokens_seen": 106627344, + "step": 1610 + }, + { + "epoch": 0.15069967707212056, + "loss": 1.8072059154510498, + "loss_ce": 0.002518388442695141, + "loss_iou": 0.8046875, + "loss_num": 0.038818359375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 106627344, + "step": 1610 + }, + { + "epoch": 0.15079327935601627, + "grad_norm": 13.350485801696777, + "learning_rate": 5e-05, + "loss": 1.4211, + "num_input_tokens_seen": 106693476, + "step": 1611 + }, + { + "epoch": 0.15079327935601627, + "loss": 1.3665543794631958, + "loss_ce": 0.006691177375614643, + "loss_iou": 0.5703125, + "loss_num": 0.043701171875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 106693476, + "step": 1611 + }, + { + "epoch": 0.150886881639912, + "grad_norm": 17.282957077026367, + "learning_rate": 5e-05, + "loss": 1.4673, + "num_input_tokens_seen": 106760160, + "step": 1612 + }, + { + "epoch": 0.150886881639912, + "loss": 1.548656702041626, + "loss_ce": 0.004711356945335865, + "loss_iou": 0.6328125, + "loss_num": 0.054931640625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 106760160, + "step": 1612 + }, + { + "epoch": 0.15098048392380775, + "grad_norm": 23.995521545410156, + "learning_rate": 5e-05, + "loss": 1.6054, + "num_input_tokens_seen": 106825812, + "step": 1613 + }, + { + "epoch": 0.15098048392380775, + "loss": 1.6569788455963135, + "loss_ce": 0.005611698143184185, + "loss_iou": 0.6796875, + "loss_num": 0.05859375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 106825812, + "step": 1613 + }, + { + "epoch": 0.15107408620770346, + "grad_norm": 55.08814239501953, + "learning_rate": 5e-05, + "loss": 1.5432, + "num_input_tokens_seen": 106892084, + "step": 1614 + }, + { + "epoch": 0.15107408620770346, + "loss": 1.3321701288223267, + "loss_ce": 0.002091982401907444, + "loss_iou": 0.6015625, + "loss_num": 0.0247802734375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 106892084, + "step": 1614 + }, + { + "epoch": 0.1511676884915992, + "grad_norm": 17.88591766357422, + "learning_rate": 5e-05, + "loss": 1.5182, + "num_input_tokens_seen": 106958200, + "step": 1615 + }, + { + "epoch": 0.1511676884915992, + "loss": 1.5853595733642578, + "loss_ce": 0.0052814362570643425, + "loss_iou": 0.6171875, + "loss_num": 0.068359375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 106958200, + "step": 1615 + }, + { + "epoch": 0.1512612907754949, + "grad_norm": 30.752655029296875, + "learning_rate": 5e-05, + "loss": 1.4185, + "num_input_tokens_seen": 107023892, + "step": 1616 + }, + { + "epoch": 0.1512612907754949, + "loss": 1.542198896408081, + "loss_ce": 0.008019291795790195, + "loss_iou": 0.640625, + "loss_num": 0.05078125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 107023892, + "step": 1616 + }, + { + "epoch": 0.15135489305939065, + "grad_norm": 22.571989059448242, + "learning_rate": 5e-05, + "loss": 1.4278, + "num_input_tokens_seen": 107091452, + "step": 1617 + }, + { + "epoch": 0.15135489305939065, + "loss": 1.383119821548462, + "loss_ce": 0.005922477692365646, + "loss_iou": 0.58984375, + "loss_num": 0.039794921875, + "loss_xval": 1.375, + "num_input_tokens_seen": 107091452, + "step": 1617 + }, + { + "epoch": 0.1514484953432864, + "grad_norm": 20.350555419921875, + "learning_rate": 5e-05, + "loss": 1.4435, + "num_input_tokens_seen": 107157924, + "step": 1618 + }, + { + "epoch": 0.1514484953432864, + "loss": 1.6578073501586914, + "loss_ce": 0.003998872824013233, + "loss_iou": 0.69921875, + "loss_num": 0.05126953125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 107157924, + "step": 1618 + }, + { + "epoch": 0.1515420976271821, + "grad_norm": 33.785484313964844, + "learning_rate": 5e-05, + "loss": 1.5386, + "num_input_tokens_seen": 107224524, + "step": 1619 + }, + { + "epoch": 0.1515420976271821, + "loss": 1.577425479888916, + "loss_ce": 0.00808954332023859, + "loss_iou": 0.65234375, + "loss_num": 0.052734375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 107224524, + "step": 1619 + }, + { + "epoch": 0.15163569991107784, + "grad_norm": 20.362022399902344, + "learning_rate": 5e-05, + "loss": 1.6265, + "num_input_tokens_seen": 107290996, + "step": 1620 + }, + { + "epoch": 0.15163569991107784, + "loss": 1.378639578819275, + "loss_ce": 0.00315127894282341, + "loss_iou": 0.63671875, + "loss_num": 0.02099609375, + "loss_xval": 1.375, + "num_input_tokens_seen": 107290996, + "step": 1620 + }, + { + "epoch": 0.15172930219497355, + "grad_norm": 30.693668365478516, + "learning_rate": 5e-05, + "loss": 1.2258, + "num_input_tokens_seen": 107357484, + "step": 1621 + }, + { + "epoch": 0.15172930219497355, + "loss": 1.1909761428833008, + "loss_ce": 0.005429234355688095, + "loss_iou": 0.5234375, + "loss_num": 0.0283203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 107357484, + "step": 1621 + }, + { + "epoch": 0.15182290447886929, + "grad_norm": 26.40045738220215, + "learning_rate": 5e-05, + "loss": 1.4304, + "num_input_tokens_seen": 107424000, + "step": 1622 + }, + { + "epoch": 0.15182290447886929, + "loss": 1.2539054155349731, + "loss_ce": 0.006163769401609898, + "loss_iou": 0.46875, + "loss_num": 0.06201171875, + "loss_xval": 1.25, + "num_input_tokens_seen": 107424000, + "step": 1622 + }, + { + "epoch": 0.15191650676276502, + "grad_norm": 17.566761016845703, + "learning_rate": 5e-05, + "loss": 1.2579, + "num_input_tokens_seen": 107490224, + "step": 1623 + }, + { + "epoch": 0.15191650676276502, + "loss": 1.2542431354522705, + "loss_ce": 0.004243158735334873, + "loss_iou": 0.53515625, + "loss_num": 0.035400390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 107490224, + "step": 1623 + }, + { + "epoch": 0.15201010904666074, + "grad_norm": 27.513626098632812, + "learning_rate": 5e-05, + "loss": 1.4749, + "num_input_tokens_seen": 107556124, + "step": 1624 + }, + { + "epoch": 0.15201010904666074, + "loss": 1.5368672609329224, + "loss_ce": 0.006593812257051468, + "loss_iou": 0.65625, + "loss_num": 0.0439453125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 107556124, + "step": 1624 + }, + { + "epoch": 0.15210371133055647, + "grad_norm": 41.309940338134766, + "learning_rate": 5e-05, + "loss": 1.357, + "num_input_tokens_seen": 107621688, + "step": 1625 + }, + { + "epoch": 0.15210371133055647, + "loss": 1.1321182250976562, + "loss_ce": 0.0032120319083333015, + "loss_iou": 0.50390625, + "loss_num": 0.024169921875, + "loss_xval": 1.125, + "num_input_tokens_seen": 107621688, + "step": 1625 + }, + { + "epoch": 0.15219731361445218, + "grad_norm": 16.765777587890625, + "learning_rate": 5e-05, + "loss": 1.6151, + "num_input_tokens_seen": 107687820, + "step": 1626 + }, + { + "epoch": 0.15219731361445218, + "loss": 1.7323691844940186, + "loss_ce": 0.0053183650597929955, + "loss_iou": 0.73828125, + "loss_num": 0.04931640625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 107687820, + "step": 1626 + }, + { + "epoch": 0.15229091589834792, + "grad_norm": 17.608959197998047, + "learning_rate": 5e-05, + "loss": 1.448, + "num_input_tokens_seen": 107753892, + "step": 1627 + }, + { + "epoch": 0.15229091589834792, + "loss": 1.3993690013885498, + "loss_ce": 0.0014197976561263204, + "loss_iou": 0.609375, + "loss_num": 0.03564453125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 107753892, + "step": 1627 + }, + { + "epoch": 0.15238451818224363, + "grad_norm": 18.415164947509766, + "learning_rate": 5e-05, + "loss": 1.4588, + "num_input_tokens_seen": 107818704, + "step": 1628 + }, + { + "epoch": 0.15238451818224363, + "loss": 1.503772497177124, + "loss_ce": 0.004505004268139601, + "loss_iou": 0.59765625, + "loss_num": 0.06005859375, + "loss_xval": 1.5, + "num_input_tokens_seen": 107818704, + "step": 1628 + }, + { + "epoch": 0.15247812046613937, + "grad_norm": 29.39680290222168, + "learning_rate": 5e-05, + "loss": 1.5631, + "num_input_tokens_seen": 107885208, + "step": 1629 + }, + { + "epoch": 0.15247812046613937, + "loss": 1.4180433750152588, + "loss_ce": 0.009107757359743118, + "loss_iou": 0.59375, + "loss_num": 0.044921875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 107885208, + "step": 1629 + }, + { + "epoch": 0.1525717227500351, + "grad_norm": 13.992585182189941, + "learning_rate": 5e-05, + "loss": 1.343, + "num_input_tokens_seen": 107949688, + "step": 1630 + }, + { + "epoch": 0.1525717227500351, + "loss": 1.2554576396942139, + "loss_ce": 0.003992866724729538, + "loss_iou": 0.5390625, + "loss_num": 0.03466796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 107949688, + "step": 1630 + }, + { + "epoch": 0.15266532503393082, + "grad_norm": 21.308378219604492, + "learning_rate": 5e-05, + "loss": 1.4879, + "num_input_tokens_seen": 108015600, + "step": 1631 + }, + { + "epoch": 0.15266532503393082, + "loss": 1.3239504098892212, + "loss_ce": 0.0021730849985033274, + "loss_iou": 0.58203125, + "loss_num": 0.031005859375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 108015600, + "step": 1631 + }, + { + "epoch": 0.15275892731782656, + "grad_norm": 28.54804801940918, + "learning_rate": 5e-05, + "loss": 1.5631, + "num_input_tokens_seen": 108081240, + "step": 1632 + }, + { + "epoch": 0.15275892731782656, + "loss": 1.6782734394073486, + "loss_ce": 0.006398522295057774, + "loss_iou": 0.7109375, + "loss_num": 0.0498046875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 108081240, + "step": 1632 + }, + { + "epoch": 0.15285252960172227, + "grad_norm": 34.5693473815918, + "learning_rate": 5e-05, + "loss": 1.6822, + "num_input_tokens_seen": 108148780, + "step": 1633 + }, + { + "epoch": 0.15285252960172227, + "loss": 1.7861237525939941, + "loss_ce": 0.006826853379607201, + "loss_iou": 0.734375, + "loss_num": 0.06201171875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 108148780, + "step": 1633 + }, + { + "epoch": 0.152946131885618, + "grad_norm": 17.04439353942871, + "learning_rate": 5e-05, + "loss": 1.5437, + "num_input_tokens_seen": 108215492, + "step": 1634 + }, + { + "epoch": 0.152946131885618, + "loss": 1.4546854496002197, + "loss_ce": 0.00351364491507411, + "loss_iou": 0.60546875, + "loss_num": 0.047119140625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 108215492, + "step": 1634 + }, + { + "epoch": 0.15303973416951375, + "grad_norm": 21.455020904541016, + "learning_rate": 5e-05, + "loss": 1.5011, + "num_input_tokens_seen": 108281208, + "step": 1635 + }, + { + "epoch": 0.15303973416951375, + "loss": 1.515901803970337, + "loss_ce": 0.005037506110966206, + "loss_iou": 0.65234375, + "loss_num": 0.04150390625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 108281208, + "step": 1635 + }, + { + "epoch": 0.15313333645340946, + "grad_norm": 17.235258102416992, + "learning_rate": 5e-05, + "loss": 1.5224, + "num_input_tokens_seen": 108346920, + "step": 1636 + }, + { + "epoch": 0.15313333645340946, + "loss": 1.5439879894256592, + "loss_ce": 0.0019958079792559147, + "loss_iou": 0.62890625, + "loss_num": 0.057373046875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 108346920, + "step": 1636 + }, + { + "epoch": 0.1532269387373052, + "grad_norm": 18.620927810668945, + "learning_rate": 5e-05, + "loss": 1.2254, + "num_input_tokens_seen": 108413068, + "step": 1637 + }, + { + "epoch": 0.1532269387373052, + "loss": 1.2213191986083984, + "loss_ce": 0.0035457073245197535, + "loss_iou": 0.51953125, + "loss_num": 0.035400390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 108413068, + "step": 1637 + }, + { + "epoch": 0.1533205410212009, + "grad_norm": 19.51734733581543, + "learning_rate": 5e-05, + "loss": 1.5715, + "num_input_tokens_seen": 108478264, + "step": 1638 + }, + { + "epoch": 0.1533205410212009, + "loss": 1.7905712127685547, + "loss_ce": 0.005414885934442282, + "loss_iou": 0.765625, + "loss_num": 0.0517578125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 108478264, + "step": 1638 + }, + { + "epoch": 0.15341414330509665, + "grad_norm": 21.999000549316406, + "learning_rate": 5e-05, + "loss": 1.3867, + "num_input_tokens_seen": 108543548, + "step": 1639 + }, + { + "epoch": 0.15341414330509665, + "loss": 1.2881088256835938, + "loss_ce": 0.00930020771920681, + "loss_iou": 0.5078125, + "loss_num": 0.052978515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 108543548, + "step": 1639 + }, + { + "epoch": 0.15350774558899238, + "grad_norm": 33.908164978027344, + "learning_rate": 5e-05, + "loss": 1.3126, + "num_input_tokens_seen": 108609232, + "step": 1640 + }, + { + "epoch": 0.15350774558899238, + "loss": 1.3487985134124756, + "loss_ce": 0.006024984642863274, + "loss_iou": 0.58984375, + "loss_num": 0.03173828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 108609232, + "step": 1640 + }, + { + "epoch": 0.1536013478728881, + "grad_norm": 38.1673698425293, + "learning_rate": 5e-05, + "loss": 1.7509, + "num_input_tokens_seen": 108675260, + "step": 1641 + }, + { + "epoch": 0.1536013478728881, + "loss": 1.6148765087127686, + "loss_ce": 0.006478075869381428, + "loss_iou": 0.7265625, + "loss_num": 0.03173828125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 108675260, + "step": 1641 + }, + { + "epoch": 0.15369495015678383, + "grad_norm": 21.1893253326416, + "learning_rate": 5e-05, + "loss": 1.2581, + "num_input_tokens_seen": 108741472, + "step": 1642 + }, + { + "epoch": 0.15369495015678383, + "loss": 1.244871735572815, + "loss_ce": 0.0012193932197988033, + "loss_iou": 0.55859375, + "loss_num": 0.0252685546875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 108741472, + "step": 1642 + }, + { + "epoch": 0.15378855244067954, + "grad_norm": 31.65707015991211, + "learning_rate": 5e-05, + "loss": 1.3797, + "num_input_tokens_seen": 108807840, + "step": 1643 + }, + { + "epoch": 0.15378855244067954, + "loss": 1.431541919708252, + "loss_ce": 0.004784135147929192, + "loss_iou": 0.6171875, + "loss_num": 0.0390625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 108807840, + "step": 1643 + }, + { + "epoch": 0.15388215472457528, + "grad_norm": 34.68901824951172, + "learning_rate": 5e-05, + "loss": 1.4691, + "num_input_tokens_seen": 108874196, + "step": 1644 + }, + { + "epoch": 0.15388215472457528, + "loss": 1.3026742935180664, + "loss_ce": 0.007264128886163235, + "loss_iou": 0.5703125, + "loss_num": 0.031494140625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 108874196, + "step": 1644 + }, + { + "epoch": 0.15397575700847102, + "grad_norm": 17.457021713256836, + "learning_rate": 5e-05, + "loss": 1.6578, + "num_input_tokens_seen": 108940340, + "step": 1645 + }, + { + "epoch": 0.15397575700847102, + "loss": 1.639525294303894, + "loss_ce": 0.0042713712900877, + "loss_iou": 0.69140625, + "loss_num": 0.051025390625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 108940340, + "step": 1645 + }, + { + "epoch": 0.15406935929236673, + "grad_norm": 18.910030364990234, + "learning_rate": 5e-05, + "loss": 1.3429, + "num_input_tokens_seen": 109005824, + "step": 1646 + }, + { + "epoch": 0.15406935929236673, + "loss": 1.312802791595459, + "loss_ce": 0.004453194327652454, + "loss_iou": 0.5546875, + "loss_num": 0.039794921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 109005824, + "step": 1646 + }, + { + "epoch": 0.15416296157626247, + "grad_norm": 22.063074111938477, + "learning_rate": 5e-05, + "loss": 1.3949, + "num_input_tokens_seen": 109071708, + "step": 1647 + }, + { + "epoch": 0.15416296157626247, + "loss": 1.3934065103530884, + "loss_ce": 0.0057111382484436035, + "loss_iou": 0.6015625, + "loss_num": 0.037353515625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 109071708, + "step": 1647 + }, + { + "epoch": 0.15425656386015818, + "grad_norm": 19.004655838012695, + "learning_rate": 5e-05, + "loss": 1.6076, + "num_input_tokens_seen": 109138780, + "step": 1648 + }, + { + "epoch": 0.15425656386015818, + "loss": 1.6425758600234985, + "loss_ce": 0.0063453614711761475, + "loss_iou": 0.671875, + "loss_num": 0.05908203125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 109138780, + "step": 1648 + }, + { + "epoch": 0.15435016614405392, + "grad_norm": 23.013456344604492, + "learning_rate": 5e-05, + "loss": 1.7906, + "num_input_tokens_seen": 109204364, + "step": 1649 + }, + { + "epoch": 0.15435016614405392, + "loss": 1.803839087486267, + "loss_ce": 0.007940629497170448, + "loss_iou": 0.7421875, + "loss_num": 0.062255859375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 109204364, + "step": 1649 + }, + { + "epoch": 0.15444376842794963, + "grad_norm": 20.71993064880371, + "learning_rate": 5e-05, + "loss": 1.3997, + "num_input_tokens_seen": 109270792, + "step": 1650 + }, + { + "epoch": 0.15444376842794963, + "loss": 1.245593547821045, + "loss_ce": 0.008777073584496975, + "loss_iou": 0.53125, + "loss_num": 0.034423828125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 109270792, + "step": 1650 + }, + { + "epoch": 0.15453737071184537, + "grad_norm": 24.68073081970215, + "learning_rate": 5e-05, + "loss": 1.5782, + "num_input_tokens_seen": 109336160, + "step": 1651 + }, + { + "epoch": 0.15453737071184537, + "loss": 1.6002860069274902, + "loss_ce": 0.0073905158787965775, + "loss_iou": 0.6484375, + "loss_num": 0.06005859375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 109336160, + "step": 1651 + }, + { + "epoch": 0.1546309729957411, + "grad_norm": 23.734413146972656, + "learning_rate": 5e-05, + "loss": 1.5779, + "num_input_tokens_seen": 109402588, + "step": 1652 + }, + { + "epoch": 0.1546309729957411, + "loss": 1.5255277156829834, + "loss_ce": 0.005019812844693661, + "loss_iou": 0.65234375, + "loss_num": 0.043701171875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 109402588, + "step": 1652 + }, + { + "epoch": 0.15472457527963682, + "grad_norm": 34.1220817565918, + "learning_rate": 5e-05, + "loss": 1.5569, + "num_input_tokens_seen": 109469492, + "step": 1653 + }, + { + "epoch": 0.15472457527963682, + "loss": 1.609342098236084, + "loss_ce": 0.004849947988986969, + "loss_iou": 0.63671875, + "loss_num": 0.06591796875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 109469492, + "step": 1653 + }, + { + "epoch": 0.15481817756353256, + "grad_norm": 46.722110748291016, + "learning_rate": 5e-05, + "loss": 1.4057, + "num_input_tokens_seen": 109535360, + "step": 1654 + }, + { + "epoch": 0.15481817756353256, + "loss": 1.1857918500900269, + "loss_ce": 0.003174723358824849, + "loss_iou": 0.46484375, + "loss_num": 0.05078125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 109535360, + "step": 1654 + }, + { + "epoch": 0.15491177984742827, + "grad_norm": 21.381216049194336, + "learning_rate": 5e-05, + "loss": 1.443, + "num_input_tokens_seen": 109602452, + "step": 1655 + }, + { + "epoch": 0.15491177984742827, + "loss": 1.32462739944458, + "loss_ce": 0.0018735050689429045, + "loss_iou": 0.59765625, + "loss_num": 0.0262451171875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 109602452, + "step": 1655 + }, + { + "epoch": 0.155005382131324, + "grad_norm": 23.39960479736328, + "learning_rate": 5e-05, + "loss": 1.7426, + "num_input_tokens_seen": 109669016, + "step": 1656 + }, + { + "epoch": 0.155005382131324, + "loss": 1.518391489982605, + "loss_ce": 0.005207954440265894, + "loss_iou": 0.62109375, + "loss_num": 0.0537109375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 109669016, + "step": 1656 + }, + { + "epoch": 0.15509898441521974, + "grad_norm": 413.7605285644531, + "learning_rate": 5e-05, + "loss": 1.3168, + "num_input_tokens_seen": 109735364, + "step": 1657 + }, + { + "epoch": 0.15509898441521974, + "loss": 1.405256748199463, + "loss_ce": 0.0026689593214541674, + "loss_iou": 0.57421875, + "loss_num": 0.05078125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 109735364, + "step": 1657 + }, + { + "epoch": 0.15519258669911545, + "grad_norm": 24.647920608520508, + "learning_rate": 5e-05, + "loss": 1.5683, + "num_input_tokens_seen": 109802096, + "step": 1658 + }, + { + "epoch": 0.15519258669911545, + "loss": 1.470474123954773, + "loss_ce": 0.0046538179740309715, + "loss_iou": 0.5703125, + "loss_num": 0.0654296875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 109802096, + "step": 1658 + }, + { + "epoch": 0.1552861889830112, + "grad_norm": 19.028324127197266, + "learning_rate": 5e-05, + "loss": 1.4417, + "num_input_tokens_seen": 109868356, + "step": 1659 + }, + { + "epoch": 0.1552861889830112, + "loss": 1.4208152294158936, + "loss_ce": 0.008217504248023033, + "loss_iou": 0.5859375, + "loss_num": 0.04931640625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 109868356, + "step": 1659 + }, + { + "epoch": 0.1553797912669069, + "grad_norm": 22.659461975097656, + "learning_rate": 5e-05, + "loss": 1.4222, + "num_input_tokens_seen": 109933888, + "step": 1660 + }, + { + "epoch": 0.1553797912669069, + "loss": 1.5754845142364502, + "loss_ce": 0.0022424214985221624, + "loss_iou": 0.66015625, + "loss_num": 0.051513671875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 109933888, + "step": 1660 + }, + { + "epoch": 0.15547339355080264, + "grad_norm": 35.12961196899414, + "learning_rate": 5e-05, + "loss": 1.523, + "num_input_tokens_seen": 109999532, + "step": 1661 + }, + { + "epoch": 0.15547339355080264, + "loss": 1.4882129430770874, + "loss_ce": 0.006279327906668186, + "loss_iou": 0.5703125, + "loss_num": 0.0693359375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 109999532, + "step": 1661 + }, + { + "epoch": 0.15556699583469838, + "grad_norm": 27.46817970275879, + "learning_rate": 5e-05, + "loss": 1.7694, + "num_input_tokens_seen": 110066912, + "step": 1662 + }, + { + "epoch": 0.15556699583469838, + "loss": 1.6405129432678223, + "loss_ce": 0.0037942719645798206, + "loss_iou": 0.7109375, + "loss_num": 0.0439453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 110066912, + "step": 1662 + }, + { + "epoch": 0.1556605981185941, + "grad_norm": 21.02796173095703, + "learning_rate": 5e-05, + "loss": 1.4437, + "num_input_tokens_seen": 110133612, + "step": 1663 + }, + { + "epoch": 0.1556605981185941, + "loss": 1.339165210723877, + "loss_ce": 0.0022510988637804985, + "loss_iou": 0.6015625, + "loss_num": 0.026611328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 110133612, + "step": 1663 + }, + { + "epoch": 0.15575420040248983, + "grad_norm": 37.47615051269531, + "learning_rate": 5e-05, + "loss": 1.2583, + "num_input_tokens_seen": 110199372, + "step": 1664 + }, + { + "epoch": 0.15575420040248983, + "loss": 1.3542975187301636, + "loss_ce": 0.009082659147679806, + "loss_iou": 0.5859375, + "loss_num": 0.033935546875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 110199372, + "step": 1664 + }, + { + "epoch": 0.15584780268638554, + "grad_norm": 18.19861602783203, + "learning_rate": 5e-05, + "loss": 1.614, + "num_input_tokens_seen": 110264840, + "step": 1665 + }, + { + "epoch": 0.15584780268638554, + "loss": 1.5534906387329102, + "loss_ce": 0.0066156648099422455, + "loss_iou": 0.68359375, + "loss_num": 0.036376953125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 110264840, + "step": 1665 + }, + { + "epoch": 0.15594140497028128, + "grad_norm": 22.97083282470703, + "learning_rate": 5e-05, + "loss": 1.3964, + "num_input_tokens_seen": 110331480, + "step": 1666 + }, + { + "epoch": 0.15594140497028128, + "loss": 1.3985646963119507, + "loss_ce": 0.0074514285661280155, + "loss_iou": 0.62109375, + "loss_num": 0.0301513671875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 110331480, + "step": 1666 + }, + { + "epoch": 0.156035007254177, + "grad_norm": 37.475643157958984, + "learning_rate": 5e-05, + "loss": 1.4204, + "num_input_tokens_seen": 110396916, + "step": 1667 + }, + { + "epoch": 0.156035007254177, + "loss": 1.2760589122772217, + "loss_ce": 0.008114602416753769, + "loss_iou": 0.52734375, + "loss_num": 0.0419921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 110396916, + "step": 1667 + }, + { + "epoch": 0.15612860953807273, + "grad_norm": 18.313720703125, + "learning_rate": 5e-05, + "loss": 1.5083, + "num_input_tokens_seen": 110462724, + "step": 1668 + }, + { + "epoch": 0.15612860953807273, + "loss": 1.5050681829452515, + "loss_ce": 0.006044772453606129, + "loss_iou": 0.5859375, + "loss_num": 0.06591796875, + "loss_xval": 1.5, + "num_input_tokens_seen": 110462724, + "step": 1668 + }, + { + "epoch": 0.15622221182196847, + "grad_norm": 20.74662208557129, + "learning_rate": 5e-05, + "loss": 1.4344, + "num_input_tokens_seen": 110529368, + "step": 1669 + }, + { + "epoch": 0.15622221182196847, + "loss": 1.3420653343200684, + "loss_ce": 0.0041747004725039005, + "loss_iou": 0.56640625, + "loss_num": 0.041259765625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 110529368, + "step": 1669 + }, + { + "epoch": 0.15631581410586418, + "grad_norm": 26.993072509765625, + "learning_rate": 5e-05, + "loss": 1.4631, + "num_input_tokens_seen": 110594948, + "step": 1670 + }, + { + "epoch": 0.15631581410586418, + "loss": 1.4857993125915527, + "loss_ce": 0.0038657882250845432, + "loss_iou": 0.62890625, + "loss_num": 0.04443359375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 110594948, + "step": 1670 + }, + { + "epoch": 0.15640941638975991, + "grad_norm": 26.8720760345459, + "learning_rate": 5e-05, + "loss": 1.3475, + "num_input_tokens_seen": 110661728, + "step": 1671 + }, + { + "epoch": 0.15640941638975991, + "loss": 1.3804752826690674, + "loss_ce": 0.004010488279163837, + "loss_iou": 0.625, + "loss_num": 0.0252685546875, + "loss_xval": 1.375, + "num_input_tokens_seen": 110661728, + "step": 1671 + }, + { + "epoch": 0.15650301867365563, + "grad_norm": 18.590309143066406, + "learning_rate": 5e-05, + "loss": 1.584, + "num_input_tokens_seen": 110726932, + "step": 1672 + }, + { + "epoch": 0.15650301867365563, + "loss": 1.5854074954986572, + "loss_ce": 0.009235595352947712, + "loss_iou": 0.671875, + "loss_num": 0.046142578125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 110726932, + "step": 1672 + }, + { + "epoch": 0.15659662095755136, + "grad_norm": 16.561054229736328, + "learning_rate": 5e-05, + "loss": 1.137, + "num_input_tokens_seen": 110793776, + "step": 1673 + }, + { + "epoch": 0.15659662095755136, + "loss": 1.1318494081497192, + "loss_ce": 0.0029431560542434454, + "loss_iou": 0.5078125, + "loss_num": 0.0224609375, + "loss_xval": 1.125, + "num_input_tokens_seen": 110793776, + "step": 1673 + }, + { + "epoch": 0.1566902232414471, + "grad_norm": 23.643726348876953, + "learning_rate": 5e-05, + "loss": 1.4951, + "num_input_tokens_seen": 110860072, + "step": 1674 + }, + { + "epoch": 0.1566902232414471, + "loss": 1.722797155380249, + "loss_ce": 0.004047118593007326, + "loss_iou": 0.7109375, + "loss_num": 0.05859375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 110860072, + "step": 1674 + }, + { + "epoch": 0.1567838255253428, + "grad_norm": 39.8719596862793, + "learning_rate": 5e-05, + "loss": 1.4484, + "num_input_tokens_seen": 110926548, + "step": 1675 + }, + { + "epoch": 0.1567838255253428, + "loss": 1.5741641521453857, + "loss_ce": 0.004828257951885462, + "loss_iou": 0.6796875, + "loss_num": 0.0419921875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 110926548, + "step": 1675 + }, + { + "epoch": 0.15687742780923855, + "grad_norm": 16.57904624938965, + "learning_rate": 5e-05, + "loss": 1.7538, + "num_input_tokens_seen": 110993156, + "step": 1676 + }, + { + "epoch": 0.15687742780923855, + "loss": 1.8768911361694336, + "loss_ce": 0.0038443254306912422, + "loss_iou": 0.7890625, + "loss_num": 0.05908203125, + "loss_xval": 1.875, + "num_input_tokens_seen": 110993156, + "step": 1676 + }, + { + "epoch": 0.15697103009313426, + "grad_norm": 19.50870704650879, + "learning_rate": 5e-05, + "loss": 1.2599, + "num_input_tokens_seen": 111058216, + "step": 1677 + }, + { + "epoch": 0.15697103009313426, + "loss": 1.0795800685882568, + "loss_ce": 0.004872962832450867, + "loss_iou": 0.447265625, + "loss_num": 0.0361328125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 111058216, + "step": 1677 + }, + { + "epoch": 0.15706463237703, + "grad_norm": 12.743280410766602, + "learning_rate": 5e-05, + "loss": 1.3816, + "num_input_tokens_seen": 111124028, + "step": 1678 + }, + { + "epoch": 0.15706463237703, + "loss": 1.6295316219329834, + "loss_ce": 0.006973108276724815, + "loss_iou": 0.63671875, + "loss_num": 0.06982421875, + "loss_xval": 1.625, + "num_input_tokens_seen": 111124028, + "step": 1678 + }, + { + "epoch": 0.15715823466092574, + "grad_norm": 11.009029388427734, + "learning_rate": 5e-05, + "loss": 1.4818, + "num_input_tokens_seen": 111190800, + "step": 1679 + }, + { + "epoch": 0.15715823466092574, + "loss": 1.6931604146957397, + "loss_ce": 0.009566588327288628, + "loss_iou": 0.6796875, + "loss_num": 0.064453125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 111190800, + "step": 1679 + }, + { + "epoch": 0.15725183694482145, + "grad_norm": 14.895833969116211, + "learning_rate": 5e-05, + "loss": 1.4207, + "num_input_tokens_seen": 111257668, + "step": 1680 + }, + { + "epoch": 0.15725183694482145, + "loss": 1.3342525959014893, + "loss_ce": 0.0022212760522961617, + "loss_iou": 0.578125, + "loss_num": 0.03564453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 111257668, + "step": 1680 + }, + { + "epoch": 0.1573454392287172, + "grad_norm": 28.282615661621094, + "learning_rate": 5e-05, + "loss": 1.6942, + "num_input_tokens_seen": 111323136, + "step": 1681 + }, + { + "epoch": 0.1573454392287172, + "loss": 1.8464713096618652, + "loss_ce": 0.005651105660945177, + "loss_iou": 0.69140625, + "loss_num": 0.09228515625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 111323136, + "step": 1681 + }, + { + "epoch": 0.1574390415126129, + "grad_norm": 17.331335067749023, + "learning_rate": 5e-05, + "loss": 1.342, + "num_input_tokens_seen": 111390316, + "step": 1682 + }, + { + "epoch": 0.1574390415126129, + "loss": 1.3260105848312378, + "loss_ce": 0.004233264364302158, + "loss_iou": 0.53125, + "loss_num": 0.051513671875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 111390316, + "step": 1682 + }, + { + "epoch": 0.15753264379650864, + "grad_norm": 21.25231170654297, + "learning_rate": 5e-05, + "loss": 1.3749, + "num_input_tokens_seen": 111455756, + "step": 1683 + }, + { + "epoch": 0.15753264379650864, + "loss": 1.3411973714828491, + "loss_ce": 0.0042833369225263596, + "loss_iou": 0.5703125, + "loss_num": 0.039794921875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 111455756, + "step": 1683 + }, + { + "epoch": 0.15762624608040438, + "grad_norm": 27.89680290222168, + "learning_rate": 5e-05, + "loss": 1.51, + "num_input_tokens_seen": 111521896, + "step": 1684 + }, + { + "epoch": 0.15762624608040438, + "loss": 1.5295137166976929, + "loss_ce": 0.006076245103031397, + "loss_iou": 0.61328125, + "loss_num": 0.05859375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 111521896, + "step": 1684 + }, + { + "epoch": 0.15771984836430009, + "grad_norm": 32.397132873535156, + "learning_rate": 5e-05, + "loss": 1.4896, + "num_input_tokens_seen": 111588764, + "step": 1685 + }, + { + "epoch": 0.15771984836430009, + "loss": 1.2692574262619019, + "loss_ce": 0.007538630161434412, + "loss_iou": 0.5703125, + "loss_num": 0.0247802734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 111588764, + "step": 1685 + }, + { + "epoch": 0.15781345064819582, + "grad_norm": 19.623626708984375, + "learning_rate": 5e-05, + "loss": 1.668, + "num_input_tokens_seen": 111655256, + "step": 1686 + }, + { + "epoch": 0.15781345064819582, + "loss": 1.6317683458328247, + "loss_ce": 0.0021296862978488207, + "loss_iou": 0.6953125, + "loss_num": 0.04833984375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 111655256, + "step": 1686 + }, + { + "epoch": 0.15790705293209154, + "grad_norm": 20.810579299926758, + "learning_rate": 5e-05, + "loss": 1.516, + "num_input_tokens_seen": 111721760, + "step": 1687 + }, + { + "epoch": 0.15790705293209154, + "loss": 1.3997474908828735, + "loss_ce": 0.0061928387731313705, + "loss_iou": 0.625, + "loss_num": 0.0289306640625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 111721760, + "step": 1687 + }, + { + "epoch": 0.15800065521598727, + "grad_norm": 19.822107315063477, + "learning_rate": 5e-05, + "loss": 1.2058, + "num_input_tokens_seen": 111789768, + "step": 1688 + }, + { + "epoch": 0.15800065521598727, + "loss": 1.2267491817474365, + "loss_ce": 0.00555785745382309, + "loss_iou": 0.56640625, + "loss_num": 0.0177001953125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 111789768, + "step": 1688 + }, + { + "epoch": 0.15809425749988298, + "grad_norm": 19.842737197875977, + "learning_rate": 5e-05, + "loss": 1.4674, + "num_input_tokens_seen": 111856788, + "step": 1689 + }, + { + "epoch": 0.15809425749988298, + "loss": 1.2636029720306396, + "loss_ce": 0.0018842138815671206, + "loss_iou": 0.53515625, + "loss_num": 0.0390625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 111856788, + "step": 1689 + }, + { + "epoch": 0.15818785978377872, + "grad_norm": 26.088294982910156, + "learning_rate": 5e-05, + "loss": 1.0922, + "num_input_tokens_seen": 111922244, + "step": 1690 + }, + { + "epoch": 0.15818785978377872, + "loss": 1.1618432998657227, + "loss_ce": 0.0031518477480858564, + "loss_iou": 0.49609375, + "loss_num": 0.033447265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 111922244, + "step": 1690 + }, + { + "epoch": 0.15828146206767446, + "grad_norm": 19.719554901123047, + "learning_rate": 5e-05, + "loss": 1.5984, + "num_input_tokens_seen": 111987568, + "step": 1691 + }, + { + "epoch": 0.15828146206767446, + "loss": 1.5319783687591553, + "loss_ce": 0.004817795939743519, + "loss_iou": 0.6484375, + "loss_num": 0.04638671875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 111987568, + "step": 1691 + }, + { + "epoch": 0.15837506435157017, + "grad_norm": 22.947669982910156, + "learning_rate": 5e-05, + "loss": 1.1232, + "num_input_tokens_seen": 112053276, + "step": 1692 + }, + { + "epoch": 0.15837506435157017, + "loss": 1.0806947946548462, + "loss_ce": 0.004522897768765688, + "loss_iou": 0.4609375, + "loss_num": 0.0303955078125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 112053276, + "step": 1692 + }, + { + "epoch": 0.1584686666354659, + "grad_norm": 22.45537567138672, + "learning_rate": 5e-05, + "loss": 1.6815, + "num_input_tokens_seen": 112119604, + "step": 1693 + }, + { + "epoch": 0.1584686666354659, + "loss": 1.701782464981079, + "loss_ce": 0.004516840912401676, + "loss_iou": 0.69921875, + "loss_num": 0.05908203125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 112119604, + "step": 1693 + }, + { + "epoch": 0.15856226891936162, + "grad_norm": 43.1389274597168, + "learning_rate": 5e-05, + "loss": 1.572, + "num_input_tokens_seen": 112185800, + "step": 1694 + }, + { + "epoch": 0.15856226891936162, + "loss": 1.6698806285858154, + "loss_ce": 0.007771380711346865, + "loss_iou": 0.6875, + "loss_num": 0.0576171875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 112185800, + "step": 1694 + }, + { + "epoch": 0.15865587120325736, + "grad_norm": 18.637434005737305, + "learning_rate": 5e-05, + "loss": 1.7675, + "num_input_tokens_seen": 112252176, + "step": 1695 + }, + { + "epoch": 0.15865587120325736, + "loss": 1.7455856800079346, + "loss_ce": 0.0033982768654823303, + "loss_iou": 0.734375, + "loss_num": 0.0556640625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 112252176, + "step": 1695 + }, + { + "epoch": 0.1587494734871531, + "grad_norm": 18.56271743774414, + "learning_rate": 5e-05, + "loss": 1.4154, + "num_input_tokens_seen": 112319724, + "step": 1696 + }, + { + "epoch": 0.1587494734871531, + "loss": 1.4192160367965698, + "loss_ce": 0.0022238276433199644, + "loss_iou": 0.58984375, + "loss_num": 0.0478515625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 112319724, + "step": 1696 + }, + { + "epoch": 0.1588430757710488, + "grad_norm": 22.364852905273438, + "learning_rate": 5e-05, + "loss": 1.4672, + "num_input_tokens_seen": 112385784, + "step": 1697 + }, + { + "epoch": 0.1588430757710488, + "loss": 1.256899118423462, + "loss_ce": 0.004335670731961727, + "loss_iou": 0.53125, + "loss_num": 0.038330078125, + "loss_xval": 1.25, + "num_input_tokens_seen": 112385784, + "step": 1697 + }, + { + "epoch": 0.15893667805494455, + "grad_norm": 37.38805389404297, + "learning_rate": 5e-05, + "loss": 1.8337, + "num_input_tokens_seen": 112452792, + "step": 1698 + }, + { + "epoch": 0.15893667805494455, + "loss": 1.7924762964248657, + "loss_ce": 0.003413844620808959, + "loss_iou": 0.7734375, + "loss_num": 0.048828125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 112452792, + "step": 1698 + }, + { + "epoch": 0.15903028033884026, + "grad_norm": 21.084718704223633, + "learning_rate": 5e-05, + "loss": 1.5781, + "num_input_tokens_seen": 112519992, + "step": 1699 + }, + { + "epoch": 0.15903028033884026, + "loss": 1.5638010501861572, + "loss_ce": 0.007160455919802189, + "loss_iou": 0.671875, + "loss_num": 0.04248046875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 112519992, + "step": 1699 + }, + { + "epoch": 0.159123882622736, + "grad_norm": 20.720853805541992, + "learning_rate": 5e-05, + "loss": 1.3595, + "num_input_tokens_seen": 112585208, + "step": 1700 + }, + { + "epoch": 0.159123882622736, + "loss": 1.4855835437774658, + "loss_ce": 0.004138189367949963, + "loss_iou": 0.6171875, + "loss_num": 0.04931640625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 112585208, + "step": 1700 + }, + { + "epoch": 0.15921748490663173, + "grad_norm": 35.28002166748047, + "learning_rate": 5e-05, + "loss": 1.4608, + "num_input_tokens_seen": 112650564, + "step": 1701 + }, + { + "epoch": 0.15921748490663173, + "loss": 1.1329504251480103, + "loss_ce": 0.0030675572343170643, + "loss_iou": 0.490234375, + "loss_num": 0.0301513671875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 112650564, + "step": 1701 + }, + { + "epoch": 0.15931108719052745, + "grad_norm": 20.50391387939453, + "learning_rate": 5e-05, + "loss": 1.3924, + "num_input_tokens_seen": 112717524, + "step": 1702 + }, + { + "epoch": 0.15931108719052745, + "loss": 1.4781479835510254, + "loss_ce": 0.005491648800671101, + "loss_iou": 0.65234375, + "loss_num": 0.03369140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 112717524, + "step": 1702 + }, + { + "epoch": 0.15940468947442318, + "grad_norm": 19.996809005737305, + "learning_rate": 5e-05, + "loss": 1.3563, + "num_input_tokens_seen": 112783004, + "step": 1703 + }, + { + "epoch": 0.15940468947442318, + "loss": 1.3293205499649048, + "loss_ce": 0.011449402198195457, + "loss_iou": 0.58203125, + "loss_num": 0.030517578125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 112783004, + "step": 1703 + }, + { + "epoch": 0.1594982917583189, + "grad_norm": 37.05931091308594, + "learning_rate": 5e-05, + "loss": 1.5565, + "num_input_tokens_seen": 112848828, + "step": 1704 + }, + { + "epoch": 0.1594982917583189, + "loss": 1.5927069187164307, + "loss_ce": 0.0016424510395154357, + "loss_iou": 0.69140625, + "loss_num": 0.041748046875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 112848828, + "step": 1704 + }, + { + "epoch": 0.15959189404221463, + "grad_norm": 13.735276222229004, + "learning_rate": 5e-05, + "loss": 1.7309, + "num_input_tokens_seen": 112915824, + "step": 1705 + }, + { + "epoch": 0.15959189404221463, + "loss": 1.5490221977233887, + "loss_ce": 0.007182505913078785, + "loss_iou": 0.65625, + "loss_num": 0.0458984375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 112915824, + "step": 1705 + }, + { + "epoch": 0.15968549632611034, + "grad_norm": 264.939208984375, + "learning_rate": 5e-05, + "loss": 1.7414, + "num_input_tokens_seen": 112982292, + "step": 1706 + }, + { + "epoch": 0.15968549632611034, + "loss": 1.9344041347503662, + "loss_ce": 0.002763570984825492, + "loss_iou": 0.8125, + "loss_num": 0.06201171875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 112982292, + "step": 1706 + }, + { + "epoch": 0.15977909861000608, + "grad_norm": 18.73700714111328, + "learning_rate": 5e-05, + "loss": 1.3528, + "num_input_tokens_seen": 113047952, + "step": 1707 + }, + { + "epoch": 0.15977909861000608, + "loss": 1.340691328048706, + "loss_ce": 0.006218681577593088, + "loss_iou": 0.5546875, + "loss_num": 0.045166015625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 113047952, + "step": 1707 + }, + { + "epoch": 0.15987270089390182, + "grad_norm": 20.325817108154297, + "learning_rate": 5e-05, + "loss": 1.4621, + "num_input_tokens_seen": 113113652, + "step": 1708 + }, + { + "epoch": 0.15987270089390182, + "loss": 1.5143709182739258, + "loss_ce": 0.00558188371360302, + "loss_iou": 0.640625, + "loss_num": 0.0458984375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 113113652, + "step": 1708 + }, + { + "epoch": 0.15996630317779753, + "grad_norm": 27.74958038330078, + "learning_rate": 5e-05, + "loss": 1.4734, + "num_input_tokens_seen": 113179736, + "step": 1709 + }, + { + "epoch": 0.15996630317779753, + "loss": 1.6561379432678223, + "loss_ce": 0.0028177243657410145, + "loss_iou": 0.69140625, + "loss_num": 0.053955078125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 113179736, + "step": 1709 + }, + { + "epoch": 0.16005990546169327, + "grad_norm": 19.84098243713379, + "learning_rate": 5e-05, + "loss": 1.4291, + "num_input_tokens_seen": 113246760, + "step": 1710 + }, + { + "epoch": 0.16005990546169327, + "loss": 1.3039138317108154, + "loss_ce": 0.00801539421081543, + "loss_iou": 0.54296875, + "loss_num": 0.04150390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 113246760, + "step": 1710 + }, + { + "epoch": 0.16015350774558898, + "grad_norm": 21.781787872314453, + "learning_rate": 5e-05, + "loss": 1.3981, + "num_input_tokens_seen": 113312584, + "step": 1711 + }, + { + "epoch": 0.16015350774558898, + "loss": 1.2843029499053955, + "loss_ce": 0.005219653248786926, + "loss_iou": 0.5390625, + "loss_num": 0.0400390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 113312584, + "step": 1711 + }, + { + "epoch": 0.16024711002948472, + "grad_norm": 37.196868896484375, + "learning_rate": 5e-05, + "loss": 1.5081, + "num_input_tokens_seen": 113378868, + "step": 1712 + }, + { + "epoch": 0.16024711002948472, + "loss": 1.3969182968139648, + "loss_ce": 0.007361485622823238, + "loss_iou": 0.6015625, + "loss_num": 0.037353515625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 113378868, + "step": 1712 + }, + { + "epoch": 0.16034071231338046, + "grad_norm": 18.481407165527344, + "learning_rate": 5e-05, + "loss": 1.9191, + "num_input_tokens_seen": 113444572, + "step": 1713 + }, + { + "epoch": 0.16034071231338046, + "loss": 1.9023736715316772, + "loss_ce": 0.005889302119612694, + "loss_iou": 0.765625, + "loss_num": 0.0732421875, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 113444572, + "step": 1713 + }, + { + "epoch": 0.16043431459727617, + "grad_norm": 17.455732345581055, + "learning_rate": 5e-05, + "loss": 1.5026, + "num_input_tokens_seen": 113510596, + "step": 1714 + }, + { + "epoch": 0.16043431459727617, + "loss": 1.6242791414260864, + "loss_ce": 0.005138522945344448, + "loss_iou": 0.6640625, + "loss_num": 0.058349609375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 113510596, + "step": 1714 + }, + { + "epoch": 0.1605279168811719, + "grad_norm": 16.29709243774414, + "learning_rate": 5e-05, + "loss": 1.2475, + "num_input_tokens_seen": 113576824, + "step": 1715 + }, + { + "epoch": 0.1605279168811719, + "loss": 1.4377756118774414, + "loss_ce": 0.0022287005558609962, + "loss_iou": 0.5859375, + "loss_num": 0.05322265625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 113576824, + "step": 1715 + }, + { + "epoch": 0.16062151916506762, + "grad_norm": 24.637439727783203, + "learning_rate": 5e-05, + "loss": 1.3664, + "num_input_tokens_seen": 113643476, + "step": 1716 + }, + { + "epoch": 0.16062151916506762, + "loss": 1.0036041736602783, + "loss_ce": 0.007998719811439514, + "loss_iou": 0.42578125, + "loss_num": 0.028564453125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 113643476, + "step": 1716 + }, + { + "epoch": 0.16071512144896336, + "grad_norm": 22.650537490844727, + "learning_rate": 5e-05, + "loss": 1.8128, + "num_input_tokens_seen": 113708928, + "step": 1717 + }, + { + "epoch": 0.16071512144896336, + "loss": 1.9136106967926025, + "loss_ce": 0.00833725742995739, + "loss_iou": 0.7734375, + "loss_num": 0.0712890625, + "loss_xval": 1.90625, + "num_input_tokens_seen": 113708928, + "step": 1717 + }, + { + "epoch": 0.1608087237328591, + "grad_norm": 24.803722381591797, + "learning_rate": 5e-05, + "loss": 1.3218, + "num_input_tokens_seen": 113774932, + "step": 1718 + }, + { + "epoch": 0.1608087237328591, + "loss": 1.2616074085235596, + "loss_ce": 0.0037948982790112495, + "loss_iou": 0.54296875, + "loss_num": 0.034912109375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 113774932, + "step": 1718 + }, + { + "epoch": 0.1609023260167548, + "grad_norm": 33.39313507080078, + "learning_rate": 5e-05, + "loss": 1.5498, + "num_input_tokens_seen": 113841396, + "step": 1719 + }, + { + "epoch": 0.1609023260167548, + "loss": 1.6364741325378418, + "loss_ce": 0.0031732642091810703, + "loss_iou": 0.70703125, + "loss_num": 0.0439453125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 113841396, + "step": 1719 + }, + { + "epoch": 0.16099592830065054, + "grad_norm": 19.37911605834961, + "learning_rate": 5e-05, + "loss": 1.6403, + "num_input_tokens_seen": 113907016, + "step": 1720 + }, + { + "epoch": 0.16099592830065054, + "loss": 1.7368113994598389, + "loss_ce": 0.00731926504522562, + "loss_iou": 0.734375, + "loss_num": 0.05224609375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 113907016, + "step": 1720 + }, + { + "epoch": 0.16108953058454625, + "grad_norm": 15.980087280273438, + "learning_rate": 5e-05, + "loss": 1.5086, + "num_input_tokens_seen": 113973288, + "step": 1721 + }, + { + "epoch": 0.16108953058454625, + "loss": 1.6609373092651367, + "loss_ce": 0.002734190784394741, + "loss_iou": 0.7109375, + "loss_num": 0.046875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 113973288, + "step": 1721 + }, + { + "epoch": 0.161183132868442, + "grad_norm": 16.575654983520508, + "learning_rate": 5e-05, + "loss": 1.3348, + "num_input_tokens_seen": 114039256, + "step": 1722 + }, + { + "epoch": 0.161183132868442, + "loss": 1.1895720958709717, + "loss_ce": 0.009396284818649292, + "loss_iou": 0.5078125, + "loss_num": 0.032470703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 114039256, + "step": 1722 + }, + { + "epoch": 0.16127673515233773, + "grad_norm": 21.366851806640625, + "learning_rate": 5e-05, + "loss": 1.4415, + "num_input_tokens_seen": 114105624, + "step": 1723 + }, + { + "epoch": 0.16127673515233773, + "loss": 1.335167646408081, + "loss_ce": 0.005577889271080494, + "loss_iou": 0.58203125, + "loss_num": 0.033935546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 114105624, + "step": 1723 + }, + { + "epoch": 0.16137033743623344, + "grad_norm": 49.82637023925781, + "learning_rate": 5e-05, + "loss": 1.5265, + "num_input_tokens_seen": 114172212, + "step": 1724 + }, + { + "epoch": 0.16137033743623344, + "loss": 1.6437671184539795, + "loss_ce": 0.00314208772033453, + "loss_iou": 0.69921875, + "loss_num": 0.048095703125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 114172212, + "step": 1724 + }, + { + "epoch": 0.16146393972012918, + "grad_norm": 21.099742889404297, + "learning_rate": 5e-05, + "loss": 1.3256, + "num_input_tokens_seen": 114238672, + "step": 1725 + }, + { + "epoch": 0.16146393972012918, + "loss": 1.3868200778961182, + "loss_ce": 0.00156625104136765, + "loss_iou": 0.60546875, + "loss_num": 0.034912109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 114238672, + "step": 1725 + }, + { + "epoch": 0.1615575420040249, + "grad_norm": 34.11566925048828, + "learning_rate": 5e-05, + "loss": 1.561, + "num_input_tokens_seen": 114304536, + "step": 1726 + }, + { + "epoch": 0.1615575420040249, + "loss": 1.4601449966430664, + "loss_ce": 0.00787458848208189, + "loss_iou": 0.59375, + "loss_num": 0.052978515625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 114304536, + "step": 1726 + }, + { + "epoch": 0.16165114428792063, + "grad_norm": 46.72332763671875, + "learning_rate": 5e-05, + "loss": 1.5351, + "num_input_tokens_seen": 114369684, + "step": 1727 + }, + { + "epoch": 0.16165114428792063, + "loss": 1.58083176612854, + "loss_ce": 0.007589599583297968, + "loss_iou": 0.65625, + "loss_num": 0.05224609375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 114369684, + "step": 1727 + }, + { + "epoch": 0.16174474657181634, + "grad_norm": 12.304830551147461, + "learning_rate": 5e-05, + "loss": 1.6069, + "num_input_tokens_seen": 114435812, + "step": 1728 + }, + { + "epoch": 0.16174474657181634, + "loss": 1.6097861528396606, + "loss_ce": 0.005293989088386297, + "loss_iou": 0.65234375, + "loss_num": 0.06005859375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 114435812, + "step": 1728 + }, + { + "epoch": 0.16183834885571208, + "grad_norm": 19.8958683013916, + "learning_rate": 5e-05, + "loss": 1.5483, + "num_input_tokens_seen": 114501696, + "step": 1729 + }, + { + "epoch": 0.16183834885571208, + "loss": 1.3685400485992432, + "loss_ce": 0.0033057434484362602, + "loss_iou": 0.57421875, + "loss_num": 0.043212890625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 114501696, + "step": 1729 + }, + { + "epoch": 0.16193195113960782, + "grad_norm": 30.420421600341797, + "learning_rate": 5e-05, + "loss": 1.5928, + "num_input_tokens_seen": 114565812, + "step": 1730 + }, + { + "epoch": 0.16193195113960782, + "loss": 1.695379614830017, + "loss_ce": 0.005926554091274738, + "loss_iou": 0.62890625, + "loss_num": 0.0859375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 114565812, + "step": 1730 + }, + { + "epoch": 0.16202555342350353, + "grad_norm": 20.304121017456055, + "learning_rate": 5e-05, + "loss": 1.6293, + "num_input_tokens_seen": 114631956, + "step": 1731 + }, + { + "epoch": 0.16202555342350353, + "loss": 1.8273935317993164, + "loss_ce": 0.006104365922510624, + "loss_iou": 0.74609375, + "loss_num": 0.06640625, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 114631956, + "step": 1731 + }, + { + "epoch": 0.16211915570739927, + "grad_norm": 19.808990478515625, + "learning_rate": 5e-05, + "loss": 1.4501, + "num_input_tokens_seen": 114698872, + "step": 1732 + }, + { + "epoch": 0.16211915570739927, + "loss": 1.4610393047332764, + "loss_ce": 0.004008077085018158, + "loss_iou": 0.6015625, + "loss_num": 0.050537109375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 114698872, + "step": 1732 + }, + { + "epoch": 0.16221275799129498, + "grad_norm": 21.50043487548828, + "learning_rate": 5e-05, + "loss": 1.432, + "num_input_tokens_seen": 114764944, + "step": 1733 + }, + { + "epoch": 0.16221275799129498, + "loss": 1.4860564470291138, + "loss_ce": 0.00412286352366209, + "loss_iou": 0.625, + "loss_num": 0.046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 114764944, + "step": 1733 + }, + { + "epoch": 0.16230636027519071, + "grad_norm": 50.31922149658203, + "learning_rate": 5e-05, + "loss": 1.5526, + "num_input_tokens_seen": 114830980, + "step": 1734 + }, + { + "epoch": 0.16230636027519071, + "loss": 1.5092554092407227, + "loss_ce": 0.0068140276707708836, + "loss_iou": 0.58203125, + "loss_num": 0.068359375, + "loss_xval": 1.5, + "num_input_tokens_seen": 114830980, + "step": 1734 + }, + { + "epoch": 0.16239996255908645, + "grad_norm": 31.06298065185547, + "learning_rate": 5e-05, + "loss": 1.5539, + "num_input_tokens_seen": 114896520, + "step": 1735 + }, + { + "epoch": 0.16239996255908645, + "loss": 1.5440752506256104, + "loss_ce": 0.007942529395222664, + "loss_iou": 0.64453125, + "loss_num": 0.050048828125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 114896520, + "step": 1735 + }, + { + "epoch": 0.16249356484298216, + "grad_norm": 22.209434509277344, + "learning_rate": 5e-05, + "loss": 1.5908, + "num_input_tokens_seen": 114962788, + "step": 1736 + }, + { + "epoch": 0.16249356484298216, + "loss": 1.6674981117248535, + "loss_ce": 0.01027146726846695, + "loss_iou": 0.7265625, + "loss_num": 0.04052734375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 114962788, + "step": 1736 + }, + { + "epoch": 0.1625871671268779, + "grad_norm": 26.099212646484375, + "learning_rate": 5e-05, + "loss": 1.7138, + "num_input_tokens_seen": 115028892, + "step": 1737 + }, + { + "epoch": 0.1625871671268779, + "loss": 1.9323307275772095, + "loss_ce": 0.005572822876274586, + "loss_iou": 0.78515625, + "loss_num": 0.0712890625, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 115028892, + "step": 1737 + }, + { + "epoch": 0.1626807694107736, + "grad_norm": 23.11241912841797, + "learning_rate": 5e-05, + "loss": 1.6092, + "num_input_tokens_seen": 115094980, + "step": 1738 + }, + { + "epoch": 0.1626807694107736, + "loss": 1.8186943531036377, + "loss_ce": 0.006194377318024635, + "loss_iou": 0.7578125, + "loss_num": 0.058837890625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 115094980, + "step": 1738 + }, + { + "epoch": 0.16277437169466935, + "grad_norm": 15.767333030700684, + "learning_rate": 5e-05, + "loss": 1.3477, + "num_input_tokens_seen": 115160364, + "step": 1739 + }, + { + "epoch": 0.16277437169466935, + "loss": 1.303972840309143, + "loss_ce": 0.004656454082578421, + "loss_iou": 0.55078125, + "loss_num": 0.0400390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 115160364, + "step": 1739 + }, + { + "epoch": 0.1628679739785651, + "grad_norm": 26.839588165283203, + "learning_rate": 5e-05, + "loss": 1.4647, + "num_input_tokens_seen": 115226664, + "step": 1740 + }, + { + "epoch": 0.1628679739785651, + "loss": 1.5073782205581665, + "loss_ce": 0.009331364184617996, + "loss_iou": 0.65625, + "loss_num": 0.036376953125, + "loss_xval": 1.5, + "num_input_tokens_seen": 115226664, + "step": 1740 + }, + { + "epoch": 0.1629615762624608, + "grad_norm": 22.037633895874023, + "learning_rate": 5e-05, + "loss": 1.4477, + "num_input_tokens_seen": 115292240, + "step": 1741 + }, + { + "epoch": 0.1629615762624608, + "loss": 1.5085651874542236, + "loss_ce": 0.005635551176965237, + "loss_iou": 0.640625, + "loss_num": 0.044921875, + "loss_xval": 1.5, + "num_input_tokens_seen": 115292240, + "step": 1741 + }, + { + "epoch": 0.16305517854635654, + "grad_norm": 24.764665603637695, + "learning_rate": 5e-05, + "loss": 1.4892, + "num_input_tokens_seen": 115359484, + "step": 1742 + }, + { + "epoch": 0.16305517854635654, + "loss": 1.4975062608718872, + "loss_ce": 0.0058071063831448555, + "loss_iou": 0.62109375, + "loss_num": 0.050048828125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 115359484, + "step": 1742 + }, + { + "epoch": 0.16314878083025225, + "grad_norm": 30.43408966064453, + "learning_rate": 5e-05, + "loss": 1.3994, + "num_input_tokens_seen": 115424972, + "step": 1743 + }, + { + "epoch": 0.16314878083025225, + "loss": 1.1698040962219238, + "loss_ce": 0.009159505367279053, + "loss_iou": 0.51953125, + "loss_num": 0.0244140625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 115424972, + "step": 1743 + }, + { + "epoch": 0.163242383114148, + "grad_norm": 14.067171096801758, + "learning_rate": 5e-05, + "loss": 1.6832, + "num_input_tokens_seen": 115490808, + "step": 1744 + }, + { + "epoch": 0.163242383114148, + "loss": 1.4237908124923706, + "loss_ce": 0.004845545627176762, + "loss_iou": 0.6171875, + "loss_num": 0.0361328125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 115490808, + "step": 1744 + }, + { + "epoch": 0.16333598539804373, + "grad_norm": 25.986255645751953, + "learning_rate": 5e-05, + "loss": 1.4692, + "num_input_tokens_seen": 115555808, + "step": 1745 + }, + { + "epoch": 0.16333598539804373, + "loss": 1.392876386642456, + "loss_ce": 0.006157543044537306, + "loss_iou": 0.57421875, + "loss_num": 0.04736328125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 115555808, + "step": 1745 + }, + { + "epoch": 0.16342958768193944, + "grad_norm": 26.338821411132812, + "learning_rate": 5e-05, + "loss": 1.2838, + "num_input_tokens_seen": 115621296, + "step": 1746 + }, + { + "epoch": 0.16342958768193944, + "loss": 1.0939245223999023, + "loss_ce": 0.00432491535320878, + "loss_iou": 0.470703125, + "loss_num": 0.029296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 115621296, + "step": 1746 + }, + { + "epoch": 0.16352318996583518, + "grad_norm": 18.52241325378418, + "learning_rate": 5e-05, + "loss": 1.4664, + "num_input_tokens_seen": 115688028, + "step": 1747 + }, + { + "epoch": 0.16352318996583518, + "loss": 1.514211654663086, + "loss_ce": 0.005422556772828102, + "loss_iou": 0.61328125, + "loss_num": 0.05615234375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 115688028, + "step": 1747 + }, + { + "epoch": 0.16361679224973089, + "grad_norm": 27.766054153442383, + "learning_rate": 5e-05, + "loss": 1.445, + "num_input_tokens_seen": 115754952, + "step": 1748 + }, + { + "epoch": 0.16361679224973089, + "loss": 1.6001439094543457, + "loss_ce": 0.0029758987948298454, + "loss_iou": 0.6328125, + "loss_num": 0.06689453125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 115754952, + "step": 1748 + }, + { + "epoch": 0.16371039453362662, + "grad_norm": 20.995222091674805, + "learning_rate": 5e-05, + "loss": 1.7868, + "num_input_tokens_seen": 115822616, + "step": 1749 + }, + { + "epoch": 0.16371039453362662, + "loss": 2.139638900756836, + "loss_ce": 0.008779620751738548, + "loss_iou": 0.8671875, + "loss_num": 0.07861328125, + "loss_xval": 2.125, + "num_input_tokens_seen": 115822616, + "step": 1749 + }, + { + "epoch": 0.16380399681752233, + "grad_norm": 36.072669982910156, + "learning_rate": 5e-05, + "loss": 1.4286, + "num_input_tokens_seen": 115888940, + "step": 1750 + }, + { + "epoch": 0.16380399681752233, + "eval_seeclick_CIoU": 0.13988454267382622, + "eval_seeclick_GIoU": 0.1664675995707512, + "eval_seeclick_IoU": 0.26141196489334106, + "eval_seeclick_MAE_all": 0.15366214513778687, + "eval_seeclick_MAE_h": 0.11682414263486862, + "eval_seeclick_MAE_w": 0.11442156881093979, + "eval_seeclick_MAE_x_boxes": 0.18956541270017624, + "eval_seeclick_MAE_y_boxes": 0.12460571527481079, + "eval_seeclick_NUM_probability": 0.9996358752250671, + "eval_seeclick_inside_bbox": 0.35625000298023224, + "eval_seeclick_loss": 2.53466796875, + "eval_seeclick_loss_ce": 0.01495735952630639, + "eval_seeclick_loss_iou": 0.87060546875, + "eval_seeclick_loss_num": 0.1603240966796875, + "eval_seeclick_loss_xval": 2.54296875, + "eval_seeclick_runtime": 61.5896, + "eval_seeclick_samples_per_second": 0.763, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 115888940, + "step": 1750 + }, + { + "epoch": 0.16380399681752233, + "eval_icons_CIoU": -0.04583752155303955, + "eval_icons_GIoU": 0.0219585164450109, + "eval_icons_IoU": 0.10735082998871803, + "eval_icons_MAE_all": 0.13731835782527924, + "eval_icons_MAE_h": 0.13323041796684265, + "eval_icons_MAE_w": 0.12281985208392143, + "eval_icons_MAE_x_boxes": 0.09434624761343002, + "eval_icons_MAE_y_boxes": 0.09666785225272179, + "eval_icons_NUM_probability": 0.999868243932724, + "eval_icons_inside_bbox": 0.1493055559694767, + "eval_icons_loss": 2.6144890785217285, + "eval_icons_loss_ce": 0.00014377359912032261, + "eval_icons_loss_iou": 0.982177734375, + "eval_icons_loss_num": 0.1362457275390625, + "eval_icons_loss_xval": 2.64501953125, + "eval_icons_runtime": 64.0511, + "eval_icons_samples_per_second": 0.781, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 115888940, + "step": 1750 + }, + { + "epoch": 0.16380399681752233, + "eval_screenspot_CIoU": -0.0028064359600345292, + "eval_screenspot_GIoU": 0.02295659513523181, + "eval_screenspot_IoU": 0.16487163801987967, + "eval_screenspot_MAE_all": 0.19828185935815176, + "eval_screenspot_MAE_h": 0.19914381702740988, + "eval_screenspot_MAE_w": 0.192783792813619, + "eval_screenspot_MAE_x_boxes": 0.18208610514799753, + "eval_screenspot_MAE_y_boxes": 0.12487444778283437, + "eval_screenspot_NUM_probability": 0.9997270703315735, + "eval_screenspot_inside_bbox": 0.3462499976158142, + "eval_screenspot_loss": 2.9908530712127686, + "eval_screenspot_loss_ce": 0.009655209258198738, + "eval_screenspot_loss_iou": 0.9970703125, + "eval_screenspot_loss_num": 0.20709228515625, + "eval_screenspot_loss_xval": 3.0305989583333335, + "eval_screenspot_runtime": 111.6207, + "eval_screenspot_samples_per_second": 0.797, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 115888940, + "step": 1750 + }, + { + "epoch": 0.16380399681752233, + "eval_compot_CIoU": -0.06205618567764759, + "eval_compot_GIoU": -0.04428771324455738, + "eval_compot_IoU": 0.10950541123747826, + "eval_compot_MAE_all": 0.17187052220106125, + "eval_compot_MAE_h": 0.14416931197047234, + "eval_compot_MAE_w": 0.15638110414147377, + "eval_compot_MAE_x_boxes": 0.15225274860858917, + "eval_compot_MAE_y_boxes": 0.12462204694747925, + "eval_compot_NUM_probability": 0.9998358190059662, + "eval_compot_inside_bbox": 0.2447916716337204, + "eval_compot_loss": 3.0344960689544678, + "eval_compot_loss_ce": 0.005371739389374852, + "eval_compot_loss_iou": 1.078125, + "eval_compot_loss_num": 0.19002532958984375, + "eval_compot_loss_xval": 3.107421875, + "eval_compot_runtime": 66.3066, + "eval_compot_samples_per_second": 0.754, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 115888940, + "step": 1750 + }, + { + "epoch": 0.16380399681752233, + "eval_custom_ui_MAE_all": 0.13191691786050797, + "eval_custom_ui_MAE_x": 0.12669065594673157, + "eval_custom_ui_MAE_y": 0.13714319840073586, + "eval_custom_ui_NUM_probability": 0.9998179078102112, + "eval_custom_ui_loss": 0.760617196559906, + "eval_custom_ui_loss_ce": 0.12496945261955261, + "eval_custom_ui_loss_num": 0.1357574462890625, + "eval_custom_ui_loss_xval": 0.6785888671875, + "eval_custom_ui_runtime": 50.361, + "eval_custom_ui_samples_per_second": 0.993, + "eval_custom_ui_steps_per_second": 0.04, + "num_input_tokens_seen": 115888940, + "step": 1750 + }, + { + "epoch": 0.16380399681752233, + "loss": 0.8060125112533569, + "loss_ce": 0.14121757447719574, + "loss_iou": 0.0, + "loss_num": 0.1328125, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 115888940, + "step": 1750 + }, + { + "epoch": 0.16389759910141807, + "grad_norm": 22.022716522216797, + "learning_rate": 5e-05, + "loss": 1.4951, + "num_input_tokens_seen": 115954508, + "step": 1751 + }, + { + "epoch": 0.16389759910141807, + "loss": 1.6610054969787598, + "loss_ce": 0.002802323317155242, + "loss_iou": 0.70703125, + "loss_num": 0.04931640625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 115954508, + "step": 1751 + }, + { + "epoch": 0.1639912013853138, + "grad_norm": 26.0401554107666, + "learning_rate": 5e-05, + "loss": 1.3329, + "num_input_tokens_seen": 116021228, + "step": 1752 + }, + { + "epoch": 0.1639912013853138, + "loss": 1.2882084846496582, + "loss_ce": 0.0010990931186825037, + "loss_iou": 0.5390625, + "loss_num": 0.041748046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 116021228, + "step": 1752 + }, + { + "epoch": 0.16408480366920952, + "grad_norm": 21.764360427856445, + "learning_rate": 5e-05, + "loss": 1.8411, + "num_input_tokens_seen": 116087360, + "step": 1753 + }, + { + "epoch": 0.16408480366920952, + "loss": 1.779345154762268, + "loss_ce": 0.006884216796606779, + "loss_iou": 0.7109375, + "loss_num": 0.06982421875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 116087360, + "step": 1753 + }, + { + "epoch": 0.16417840595310526, + "grad_norm": 14.626240730285645, + "learning_rate": 5e-05, + "loss": 1.2688, + "num_input_tokens_seen": 116155684, + "step": 1754 + }, + { + "epoch": 0.16417840595310526, + "loss": 1.3500797748565674, + "loss_ce": 0.004864873364567757, + "loss_iou": 0.578125, + "loss_num": 0.038330078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 116155684, + "step": 1754 + }, + { + "epoch": 0.16427200823700097, + "grad_norm": 55.89632797241211, + "learning_rate": 5e-05, + "loss": 1.2884, + "num_input_tokens_seen": 116221772, + "step": 1755 + }, + { + "epoch": 0.16427200823700097, + "loss": 1.161569356918335, + "loss_ce": 0.006723121739923954, + "loss_iou": 0.5, + "loss_num": 0.0303955078125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 116221772, + "step": 1755 + }, + { + "epoch": 0.1643656105208967, + "grad_norm": 36.48343276977539, + "learning_rate": 5e-05, + "loss": 1.3916, + "num_input_tokens_seen": 116288536, + "step": 1756 + }, + { + "epoch": 0.1643656105208967, + "loss": 1.2644689083099365, + "loss_ce": 0.004703253507614136, + "loss_iou": 0.578125, + "loss_num": 0.0198974609375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 116288536, + "step": 1756 + }, + { + "epoch": 0.16445921280479245, + "grad_norm": 22.66897201538086, + "learning_rate": 5e-05, + "loss": 1.776, + "num_input_tokens_seen": 116354792, + "step": 1757 + }, + { + "epoch": 0.16445921280479245, + "loss": 1.6933791637420654, + "loss_ce": 0.005299382843077183, + "loss_iou": 0.6953125, + "loss_num": 0.06005859375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 116354792, + "step": 1757 + }, + { + "epoch": 0.16455281508868816, + "grad_norm": 22.034452438354492, + "learning_rate": 5e-05, + "loss": 1.367, + "num_input_tokens_seen": 116421524, + "step": 1758 + }, + { + "epoch": 0.16455281508868816, + "loss": 1.366721272468567, + "loss_ce": 0.005393100902438164, + "loss_iou": 0.58203125, + "loss_num": 0.03857421875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 116421524, + "step": 1758 + }, + { + "epoch": 0.1646464173725839, + "grad_norm": 42.0161247253418, + "learning_rate": 5e-05, + "loss": 1.348, + "num_input_tokens_seen": 116487824, + "step": 1759 + }, + { + "epoch": 0.1646464173725839, + "loss": 1.3555519580841064, + "loss_ce": 0.007895732298493385, + "loss_iou": 0.61328125, + "loss_num": 0.02490234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 116487824, + "step": 1759 + }, + { + "epoch": 0.1647400196564796, + "grad_norm": 26.5911865234375, + "learning_rate": 5e-05, + "loss": 1.5691, + "num_input_tokens_seen": 116554052, + "step": 1760 + }, + { + "epoch": 0.1647400196564796, + "loss": 1.5571060180664062, + "loss_ce": 0.0048599690198898315, + "loss_iou": 0.671875, + "loss_num": 0.041015625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 116554052, + "step": 1760 + }, + { + "epoch": 0.16483362194037535, + "grad_norm": 195.60792541503906, + "learning_rate": 5e-05, + "loss": 1.4447, + "num_input_tokens_seen": 116619608, + "step": 1761 + }, + { + "epoch": 0.16483362194037535, + "loss": 1.2059764862060547, + "loss_ce": 0.0038279893342405558, + "loss_iou": 0.51171875, + "loss_num": 0.0361328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 116619608, + "step": 1761 + }, + { + "epoch": 0.16492722422427109, + "grad_norm": 19.173919677734375, + "learning_rate": 5e-05, + "loss": 1.3819, + "num_input_tokens_seen": 116685620, + "step": 1762 + }, + { + "epoch": 0.16492722422427109, + "loss": 1.4086960554122925, + "loss_ce": 0.0034227129071950912, + "loss_iou": 0.578125, + "loss_num": 0.050048828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 116685620, + "step": 1762 + }, + { + "epoch": 0.1650208265081668, + "grad_norm": 27.1180477142334, + "learning_rate": 5e-05, + "loss": 1.2627, + "num_input_tokens_seen": 116751244, + "step": 1763 + }, + { + "epoch": 0.1650208265081668, + "loss": 1.0484132766723633, + "loss_ce": 0.002881082706153393, + "loss_iou": 0.458984375, + "loss_num": 0.0255126953125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 116751244, + "step": 1763 + }, + { + "epoch": 0.16511442879206253, + "grad_norm": 30.75033950805664, + "learning_rate": 5e-05, + "loss": 1.3692, + "num_input_tokens_seen": 116818284, + "step": 1764 + }, + { + "epoch": 0.16511442879206253, + "loss": 1.188355803489685, + "loss_ce": 0.005250256508588791, + "loss_iou": 0.51953125, + "loss_num": 0.0286865234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 116818284, + "step": 1764 + }, + { + "epoch": 0.16520803107595824, + "grad_norm": 24.517242431640625, + "learning_rate": 5e-05, + "loss": 1.6151, + "num_input_tokens_seen": 116884392, + "step": 1765 + }, + { + "epoch": 0.16520803107595824, + "loss": 1.4741705656051636, + "loss_ce": 0.002490849932655692, + "loss_iou": 0.625, + "loss_num": 0.0439453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 116884392, + "step": 1765 + }, + { + "epoch": 0.16530163335985398, + "grad_norm": 24.159650802612305, + "learning_rate": 5e-05, + "loss": 1.3617, + "num_input_tokens_seen": 116950292, + "step": 1766 + }, + { + "epoch": 0.16530163335985398, + "loss": 1.3831593990325928, + "loss_ce": 0.006206326186656952, + "loss_iou": 0.61328125, + "loss_num": 0.02978515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 116950292, + "step": 1766 + }, + { + "epoch": 0.1653952356437497, + "grad_norm": 20.05497932434082, + "learning_rate": 5e-05, + "loss": 1.6595, + "num_input_tokens_seen": 117018208, + "step": 1767 + }, + { + "epoch": 0.1653952356437497, + "loss": 1.487356185913086, + "loss_ce": 0.002981242025271058, + "loss_iou": 0.640625, + "loss_num": 0.040283203125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 117018208, + "step": 1767 + }, + { + "epoch": 0.16548883792764543, + "grad_norm": 22.533309936523438, + "learning_rate": 5e-05, + "loss": 1.4024, + "num_input_tokens_seen": 117084768, + "step": 1768 + }, + { + "epoch": 0.16548883792764543, + "loss": 1.3510992527008057, + "loss_ce": 0.00734925689175725, + "loss_iou": 0.62890625, + "loss_num": 0.0174560546875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 117084768, + "step": 1768 + }, + { + "epoch": 0.16558244021154117, + "grad_norm": 18.84259605407715, + "learning_rate": 5e-05, + "loss": 1.6004, + "num_input_tokens_seen": 117151304, + "step": 1769 + }, + { + "epoch": 0.16558244021154117, + "loss": 1.465224266052246, + "loss_ce": 0.004774982109665871, + "loss_iou": 0.640625, + "loss_num": 0.035400390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 117151304, + "step": 1769 + }, + { + "epoch": 0.16567604249543688, + "grad_norm": 16.174936294555664, + "learning_rate": 5e-05, + "loss": 1.4084, + "num_input_tokens_seen": 117217136, + "step": 1770 + }, + { + "epoch": 0.16567604249543688, + "loss": 1.5225234031677246, + "loss_ce": 0.006898357067257166, + "loss_iou": 0.640625, + "loss_num": 0.046630859375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 117217136, + "step": 1770 + }, + { + "epoch": 0.16576964477933262, + "grad_norm": 22.07652473449707, + "learning_rate": 5e-05, + "loss": 1.3213, + "num_input_tokens_seen": 117284092, + "step": 1771 + }, + { + "epoch": 0.16576964477933262, + "loss": 1.464104413986206, + "loss_ce": 0.008538098074495792, + "loss_iou": 0.625, + "loss_num": 0.041259765625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 117284092, + "step": 1771 + }, + { + "epoch": 0.16586324706322833, + "grad_norm": 33.298805236816406, + "learning_rate": 5e-05, + "loss": 1.3119, + "num_input_tokens_seen": 117350076, + "step": 1772 + }, + { + "epoch": 0.16586324706322833, + "loss": 1.1476900577545166, + "loss_ce": 0.0020602052100002766, + "loss_iou": 0.482421875, + "loss_num": 0.035888671875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 117350076, + "step": 1772 + }, + { + "epoch": 0.16595684934712407, + "grad_norm": 18.937501907348633, + "learning_rate": 5e-05, + "loss": 1.716, + "num_input_tokens_seen": 117416696, + "step": 1773 + }, + { + "epoch": 0.16595684934712407, + "loss": 1.636894941329956, + "loss_ce": 0.0031060222536325455, + "loss_iou": 0.71875, + "loss_num": 0.038818359375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 117416696, + "step": 1773 + }, + { + "epoch": 0.1660504516310198, + "grad_norm": 15.270478248596191, + "learning_rate": 5e-05, + "loss": 1.4745, + "num_input_tokens_seen": 117483156, + "step": 1774 + }, + { + "epoch": 0.1660504516310198, + "loss": 1.3174545764923096, + "loss_ce": 0.0015367262531071901, + "loss_iou": 0.5859375, + "loss_num": 0.0284423828125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 117483156, + "step": 1774 + }, + { + "epoch": 0.16614405391491552, + "grad_norm": 26.782865524291992, + "learning_rate": 5e-05, + "loss": 1.3052, + "num_input_tokens_seen": 117548936, + "step": 1775 + }, + { + "epoch": 0.16614405391491552, + "loss": 1.3338900804519653, + "loss_ce": 0.005276761949062347, + "loss_iou": 0.56640625, + "loss_num": 0.038818359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 117548936, + "step": 1775 + }, + { + "epoch": 0.16623765619881126, + "grad_norm": 27.9973087310791, + "learning_rate": 5e-05, + "loss": 1.6991, + "num_input_tokens_seen": 117614620, + "step": 1776 + }, + { + "epoch": 0.16623765619881126, + "loss": 1.6971447467803955, + "loss_ce": 0.004273728467524052, + "loss_iou": 0.74609375, + "loss_num": 0.040771484375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 117614620, + "step": 1776 + }, + { + "epoch": 0.16633125848270697, + "grad_norm": 18.598751068115234, + "learning_rate": 5e-05, + "loss": 1.3747, + "num_input_tokens_seen": 117681268, + "step": 1777 + }, + { + "epoch": 0.16633125848270697, + "loss": 1.3994638919830322, + "loss_ce": 0.003956090193241835, + "loss_iou": 0.62109375, + "loss_num": 0.031005859375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 117681268, + "step": 1777 + }, + { + "epoch": 0.1664248607666027, + "grad_norm": 20.475177764892578, + "learning_rate": 5e-05, + "loss": 1.4817, + "num_input_tokens_seen": 117747892, + "step": 1778 + }, + { + "epoch": 0.1664248607666027, + "loss": 1.3213424682617188, + "loss_ce": 0.002494772197678685, + "loss_iou": 0.5234375, + "loss_num": 0.05419921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 117747892, + "step": 1778 + }, + { + "epoch": 0.16651846305049844, + "grad_norm": 41.40374755859375, + "learning_rate": 5e-05, + "loss": 1.6044, + "num_input_tokens_seen": 117813716, + "step": 1779 + }, + { + "epoch": 0.16651846305049844, + "loss": 1.6195666790008545, + "loss_ce": 0.006285447161644697, + "loss_iou": 0.6796875, + "loss_num": 0.050537109375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 117813716, + "step": 1779 + }, + { + "epoch": 0.16661206533439415, + "grad_norm": 17.386091232299805, + "learning_rate": 5e-05, + "loss": 1.7843, + "num_input_tokens_seen": 117880480, + "step": 1780 + }, + { + "epoch": 0.16661206533439415, + "loss": 2.0016939640045166, + "loss_ce": 0.008529771119356155, + "loss_iou": 0.8515625, + "loss_num": 0.057373046875, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 117880480, + "step": 1780 + }, + { + "epoch": 0.1667056676182899, + "grad_norm": 95.06216430664062, + "learning_rate": 5e-05, + "loss": 1.634, + "num_input_tokens_seen": 117946152, + "step": 1781 + }, + { + "epoch": 0.1667056676182899, + "loss": 1.7567811012268066, + "loss_ce": 0.009710765443742275, + "loss_iou": 0.71484375, + "loss_num": 0.0625, + "loss_xval": 1.75, + "num_input_tokens_seen": 117946152, + "step": 1781 + }, + { + "epoch": 0.1667992699021856, + "grad_norm": 23.291643142700195, + "learning_rate": 5e-05, + "loss": 1.1976, + "num_input_tokens_seen": 118012344, + "step": 1782 + }, + { + "epoch": 0.1667992699021856, + "loss": 1.3272414207458496, + "loss_ce": 0.002046172972768545, + "loss_iou": 0.56640625, + "loss_num": 0.038818359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 118012344, + "step": 1782 + }, + { + "epoch": 0.16689287218608134, + "grad_norm": 22.385786056518555, + "learning_rate": 5e-05, + "loss": 1.7416, + "num_input_tokens_seen": 118078248, + "step": 1783 + }, + { + "epoch": 0.16689287218608134, + "loss": 1.5646185874938965, + "loss_ce": 0.001141960616223514, + "loss_iou": 0.6953125, + "loss_num": 0.03515625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 118078248, + "step": 1783 + }, + { + "epoch": 0.16698647446997708, + "grad_norm": 17.838022232055664, + "learning_rate": 5e-05, + "loss": 1.2839, + "num_input_tokens_seen": 118144696, + "step": 1784 + }, + { + "epoch": 0.16698647446997708, + "loss": 1.2266840934753418, + "loss_ce": 0.004577185958623886, + "loss_iou": 0.5, + "loss_num": 0.0439453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 118144696, + "step": 1784 + }, + { + "epoch": 0.1670800767538728, + "grad_norm": 24.808393478393555, + "learning_rate": 5e-05, + "loss": 1.6966, + "num_input_tokens_seen": 118211268, + "step": 1785 + }, + { + "epoch": 0.1670800767538728, + "loss": 1.9299710988998413, + "loss_ce": 0.0012601525522768497, + "loss_iou": 0.8203125, + "loss_num": 0.057373046875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 118211268, + "step": 1785 + }, + { + "epoch": 0.16717367903776853, + "grad_norm": 42.57669448852539, + "learning_rate": 5e-05, + "loss": 1.6544, + "num_input_tokens_seen": 118276872, + "step": 1786 + }, + { + "epoch": 0.16717367903776853, + "loss": 1.6943639516830444, + "loss_ce": 0.004910839721560478, + "loss_iou": 0.75, + "loss_num": 0.037353515625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 118276872, + "step": 1786 + }, + { + "epoch": 0.16726728132166424, + "grad_norm": 21.89909553527832, + "learning_rate": 5e-05, + "loss": 1.8305, + "num_input_tokens_seen": 118343504, + "step": 1787 + }, + { + "epoch": 0.16726728132166424, + "loss": 1.8874485492706299, + "loss_ce": 0.011472001671791077, + "loss_iou": 0.82421875, + "loss_num": 0.04541015625, + "loss_xval": 1.875, + "num_input_tokens_seen": 118343504, + "step": 1787 + }, + { + "epoch": 0.16736088360555998, + "grad_norm": 18.595476150512695, + "learning_rate": 5e-05, + "loss": 1.6608, + "num_input_tokens_seen": 118410004, + "step": 1788 + }, + { + "epoch": 0.16736088360555998, + "loss": 1.6199846267700195, + "loss_ce": 0.01012139581143856, + "loss_iou": 0.66015625, + "loss_num": 0.057861328125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 118410004, + "step": 1788 + }, + { + "epoch": 0.1674544858894557, + "grad_norm": 37.87059020996094, + "learning_rate": 5e-05, + "loss": 1.3597, + "num_input_tokens_seen": 118476512, + "step": 1789 + }, + { + "epoch": 0.1674544858894557, + "loss": 1.4484469890594482, + "loss_ce": 0.005087633151561022, + "loss_iou": 0.58203125, + "loss_num": 0.056396484375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 118476512, + "step": 1789 + }, + { + "epoch": 0.16754808817335143, + "grad_norm": 17.019468307495117, + "learning_rate": 5e-05, + "loss": 1.6132, + "num_input_tokens_seen": 118542992, + "step": 1790 + }, + { + "epoch": 0.16754808817335143, + "loss": 1.6254267692565918, + "loss_ce": 0.0033564118202775717, + "loss_iou": 0.65234375, + "loss_num": 0.06298828125, + "loss_xval": 1.625, + "num_input_tokens_seen": 118542992, + "step": 1790 + }, + { + "epoch": 0.16764169045724717, + "grad_norm": 15.186246871948242, + "learning_rate": 5e-05, + "loss": 1.3601, + "num_input_tokens_seen": 118610108, + "step": 1791 + }, + { + "epoch": 0.16764169045724717, + "loss": 1.2531754970550537, + "loss_ce": 0.005128608085215092, + "loss_iou": 0.5390625, + "loss_num": 0.033935546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 118610108, + "step": 1791 + }, + { + "epoch": 0.16773529274114288, + "grad_norm": 21.040952682495117, + "learning_rate": 5e-05, + "loss": 1.5555, + "num_input_tokens_seen": 118676632, + "step": 1792 + }, + { + "epoch": 0.16773529274114288, + "loss": 1.5859944820404053, + "loss_ce": 0.00396324135363102, + "loss_iou": 0.640625, + "loss_num": 0.060546875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 118676632, + "step": 1792 + }, + { + "epoch": 0.16782889502503862, + "grad_norm": 18.22553062438965, + "learning_rate": 5e-05, + "loss": 1.3909, + "num_input_tokens_seen": 118743228, + "step": 1793 + }, + { + "epoch": 0.16782889502503862, + "loss": 1.3683271408081055, + "loss_ce": 0.0035810484550893307, + "loss_iou": 0.578125, + "loss_num": 0.041748046875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 118743228, + "step": 1793 + }, + { + "epoch": 0.16792249730893433, + "grad_norm": 19.60796356201172, + "learning_rate": 5e-05, + "loss": 1.3766, + "num_input_tokens_seen": 118809676, + "step": 1794 + }, + { + "epoch": 0.16792249730893433, + "loss": 1.3190234899520874, + "loss_ce": 0.004570364952087402, + "loss_iou": 0.578125, + "loss_num": 0.031982421875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 118809676, + "step": 1794 + }, + { + "epoch": 0.16801609959283006, + "grad_norm": 23.017669677734375, + "learning_rate": 5e-05, + "loss": 1.2183, + "num_input_tokens_seen": 118875244, + "step": 1795 + }, + { + "epoch": 0.16801609959283006, + "loss": 1.1294887065887451, + "loss_ce": 0.012301236391067505, + "loss_iou": 0.470703125, + "loss_num": 0.034912109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 118875244, + "step": 1795 + }, + { + "epoch": 0.1681097018767258, + "grad_norm": 24.088245391845703, + "learning_rate": 5e-05, + "loss": 1.5871, + "num_input_tokens_seen": 118940184, + "step": 1796 + }, + { + "epoch": 0.1681097018767258, + "loss": 1.6394013166427612, + "loss_ce": 0.004635668359696865, + "loss_iou": 0.6953125, + "loss_num": 0.048583984375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 118940184, + "step": 1796 + }, + { + "epoch": 0.16820330416062151, + "grad_norm": 26.892854690551758, + "learning_rate": 5e-05, + "loss": 1.4716, + "num_input_tokens_seen": 119006516, + "step": 1797 + }, + { + "epoch": 0.16820330416062151, + "loss": 1.5686166286468506, + "loss_ce": 0.006116572301834822, + "loss_iou": 0.65234375, + "loss_num": 0.051513671875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 119006516, + "step": 1797 + }, + { + "epoch": 0.16829690644451725, + "grad_norm": 21.019927978515625, + "learning_rate": 5e-05, + "loss": 1.5484, + "num_input_tokens_seen": 119073728, + "step": 1798 + }, + { + "epoch": 0.16829690644451725, + "loss": 1.622941255569458, + "loss_ce": 0.0028240878600627184, + "loss_iou": 0.6875, + "loss_num": 0.048583984375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 119073728, + "step": 1798 + }, + { + "epoch": 0.16839050872841296, + "grad_norm": 22.851734161376953, + "learning_rate": 5e-05, + "loss": 1.3609, + "num_input_tokens_seen": 119140208, + "step": 1799 + }, + { + "epoch": 0.16839050872841296, + "loss": 1.2709921598434448, + "loss_ce": 0.003902365453541279, + "loss_iou": 0.53515625, + "loss_num": 0.039794921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 119140208, + "step": 1799 + }, + { + "epoch": 0.1684841110123087, + "grad_norm": 46.07211685180664, + "learning_rate": 5e-05, + "loss": 1.5127, + "num_input_tokens_seen": 119207072, + "step": 1800 + }, + { + "epoch": 0.1684841110123087, + "loss": 1.3361321687698364, + "loss_ce": 0.00873957946896553, + "loss_iou": 0.55859375, + "loss_num": 0.0419921875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 119207072, + "step": 1800 + }, + { + "epoch": 0.16857771329620444, + "grad_norm": 12.440570831298828, + "learning_rate": 5e-05, + "loss": 1.1381, + "num_input_tokens_seen": 119273512, + "step": 1801 + }, + { + "epoch": 0.16857771329620444, + "loss": 1.0420058965682983, + "loss_ce": 0.0026992480270564556, + "loss_iou": 0.451171875, + "loss_num": 0.0274658203125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 119273512, + "step": 1801 + }, + { + "epoch": 0.16867131558010015, + "grad_norm": 18.88530731201172, + "learning_rate": 5e-05, + "loss": 1.4581, + "num_input_tokens_seen": 119339200, + "step": 1802 + }, + { + "epoch": 0.16867131558010015, + "loss": 1.5958948135375977, + "loss_ce": 0.003121304791420698, + "loss_iou": 0.6796875, + "loss_num": 0.046142578125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 119339200, + "step": 1802 + }, + { + "epoch": 0.1687649178639959, + "grad_norm": 25.671167373657227, + "learning_rate": 5e-05, + "loss": 1.285, + "num_input_tokens_seen": 119405328, + "step": 1803 + }, + { + "epoch": 0.1687649178639959, + "loss": 1.2431070804595947, + "loss_ce": 0.004581674467772245, + "loss_iou": 0.5234375, + "loss_num": 0.038330078125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 119405328, + "step": 1803 + }, + { + "epoch": 0.1688585201478916, + "grad_norm": 31.151432037353516, + "learning_rate": 5e-05, + "loss": 1.2524, + "num_input_tokens_seen": 119471152, + "step": 1804 + }, + { + "epoch": 0.1688585201478916, + "loss": 1.3681247234344482, + "loss_ce": 0.006308273877948523, + "loss_iou": 0.53125, + "loss_num": 0.059326171875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 119471152, + "step": 1804 + }, + { + "epoch": 0.16895212243178734, + "grad_norm": 29.523393630981445, + "learning_rate": 5e-05, + "loss": 1.3213, + "num_input_tokens_seen": 119536220, + "step": 1805 + }, + { + "epoch": 0.16895212243178734, + "loss": 1.1777355670928955, + "loss_ce": 0.004639958962798119, + "loss_iou": 0.5078125, + "loss_num": 0.031982421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 119536220, + "step": 1805 + }, + { + "epoch": 0.16904572471568305, + "grad_norm": 22.126577377319336, + "learning_rate": 5e-05, + "loss": 1.555, + "num_input_tokens_seen": 119602048, + "step": 1806 + }, + { + "epoch": 0.16904572471568305, + "loss": 1.6079837083816528, + "loss_ce": 0.006909474730491638, + "loss_iou": 0.69140625, + "loss_num": 0.04296875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 119602048, + "step": 1806 + }, + { + "epoch": 0.1691393269995788, + "grad_norm": 9.718029022216797, + "learning_rate": 5e-05, + "loss": 1.0901, + "num_input_tokens_seen": 119667488, + "step": 1807 + }, + { + "epoch": 0.1691393269995788, + "loss": 1.0904566049575806, + "loss_ce": 0.00891362875699997, + "loss_iou": 0.455078125, + "loss_num": 0.034423828125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 119667488, + "step": 1807 + }, + { + "epoch": 0.16923292928347453, + "grad_norm": 13.937957763671875, + "learning_rate": 5e-05, + "loss": 1.1378, + "num_input_tokens_seen": 119733132, + "step": 1808 + }, + { + "epoch": 0.16923292928347453, + "loss": 1.1642824411392212, + "loss_ce": 0.010046683251857758, + "loss_iou": 0.4609375, + "loss_num": 0.046142578125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 119733132, + "step": 1808 + }, + { + "epoch": 0.16932653156737024, + "grad_norm": 19.477317810058594, + "learning_rate": 5e-05, + "loss": 1.3858, + "num_input_tokens_seen": 119799404, + "step": 1809 + }, + { + "epoch": 0.16932653156737024, + "loss": 1.250647783279419, + "loss_ce": 0.0072395941242575645, + "loss_iou": 0.53515625, + "loss_num": 0.035400390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 119799404, + "step": 1809 + }, + { + "epoch": 0.16942013385126597, + "grad_norm": 27.120983123779297, + "learning_rate": 5e-05, + "loss": 1.375, + "num_input_tokens_seen": 119866148, + "step": 1810 + }, + { + "epoch": 0.16942013385126597, + "loss": 1.3680001497268677, + "loss_ce": 0.0025826841592788696, + "loss_iou": 0.56640625, + "loss_num": 0.046142578125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 119866148, + "step": 1810 + }, + { + "epoch": 0.16951373613516169, + "grad_norm": 19.939014434814453, + "learning_rate": 5e-05, + "loss": 1.3369, + "num_input_tokens_seen": 119932400, + "step": 1811 + }, + { + "epoch": 0.16951373613516169, + "loss": 1.4508637189865112, + "loss_ce": 0.006527773104608059, + "loss_iou": 0.64453125, + "loss_num": 0.03173828125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 119932400, + "step": 1811 + }, + { + "epoch": 0.16960733841905742, + "grad_norm": 47.441558837890625, + "learning_rate": 5e-05, + "loss": 1.3947, + "num_input_tokens_seen": 119998880, + "step": 1812 + }, + { + "epoch": 0.16960733841905742, + "loss": 1.3162851333618164, + "loss_ce": 0.0028085997328162193, + "loss_iou": 0.5625, + "loss_num": 0.037841796875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 119998880, + "step": 1812 + }, + { + "epoch": 0.16970094070295316, + "grad_norm": 17.63170051574707, + "learning_rate": 5e-05, + "loss": 1.4194, + "num_input_tokens_seen": 120064988, + "step": 1813 + }, + { + "epoch": 0.16970094070295316, + "loss": 1.4789669513702393, + "loss_ce": 0.005334258079528809, + "loss_iou": 0.59375, + "loss_num": 0.056640625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 120064988, + "step": 1813 + }, + { + "epoch": 0.16979454298684887, + "grad_norm": 39.17177963256836, + "learning_rate": 5e-05, + "loss": 1.3649, + "num_input_tokens_seen": 120131336, + "step": 1814 + }, + { + "epoch": 0.16979454298684887, + "loss": 1.5063436031341553, + "loss_ce": 0.003657993394881487, + "loss_iou": 0.65234375, + "loss_num": 0.0400390625, + "loss_xval": 1.5, + "num_input_tokens_seen": 120131336, + "step": 1814 + }, + { + "epoch": 0.1698881452707446, + "grad_norm": 17.9035587310791, + "learning_rate": 5e-05, + "loss": 1.6655, + "num_input_tokens_seen": 120197392, + "step": 1815 + }, + { + "epoch": 0.1698881452707446, + "loss": 1.4572584629058838, + "loss_ce": 0.0041333818808197975, + "loss_iou": 0.62890625, + "loss_num": 0.039306640625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 120197392, + "step": 1815 + }, + { + "epoch": 0.16998174755464032, + "grad_norm": 20.34004020690918, + "learning_rate": 5e-05, + "loss": 1.2102, + "num_input_tokens_seen": 120263592, + "step": 1816 + }, + { + "epoch": 0.16998174755464032, + "loss": 0.9859544038772583, + "loss_ce": 0.0032884140964597464, + "loss_iou": 0.43359375, + "loss_num": 0.0228271484375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 120263592, + "step": 1816 + }, + { + "epoch": 0.17007534983853606, + "grad_norm": 41.65287399291992, + "learning_rate": 5e-05, + "loss": 1.5414, + "num_input_tokens_seen": 120331300, + "step": 1817 + }, + { + "epoch": 0.17007534983853606, + "loss": 1.572178840637207, + "loss_ce": 0.003819531761109829, + "loss_iou": 0.66796875, + "loss_num": 0.047607421875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 120331300, + "step": 1817 + }, + { + "epoch": 0.1701689521224318, + "grad_norm": 19.289501190185547, + "learning_rate": 5e-05, + "loss": 1.6966, + "num_input_tokens_seen": 120396736, + "step": 1818 + }, + { + "epoch": 0.1701689521224318, + "loss": 1.491598129272461, + "loss_ce": 0.005269893445074558, + "loss_iou": 0.6953125, + "loss_num": 0.0189208984375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 120396736, + "step": 1818 + }, + { + "epoch": 0.1702625544063275, + "grad_norm": 25.355976104736328, + "learning_rate": 5e-05, + "loss": 1.6667, + "num_input_tokens_seen": 120462384, + "step": 1819 + }, + { + "epoch": 0.1702625544063275, + "loss": 1.6338341236114502, + "loss_ce": 0.009322349913418293, + "loss_iou": 0.62890625, + "loss_num": 0.072265625, + "loss_xval": 1.625, + "num_input_tokens_seen": 120462384, + "step": 1819 + }, + { + "epoch": 0.17035615669022325, + "grad_norm": 26.46224594116211, + "learning_rate": 5e-05, + "loss": 1.338, + "num_input_tokens_seen": 120527944, + "step": 1820 + }, + { + "epoch": 0.17035615669022325, + "loss": 1.0851386785507202, + "loss_ce": 0.001886697020381689, + "loss_iou": 0.47265625, + "loss_num": 0.027587890625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 120527944, + "step": 1820 + }, + { + "epoch": 0.17044975897411896, + "grad_norm": 21.288166046142578, + "learning_rate": 5e-05, + "loss": 1.5281, + "num_input_tokens_seen": 120594564, + "step": 1821 + }, + { + "epoch": 0.17044975897411896, + "loss": 1.4423449039459229, + "loss_ce": 0.007042068988084793, + "loss_iou": 0.6171875, + "loss_num": 0.041015625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 120594564, + "step": 1821 + }, + { + "epoch": 0.1705433612580147, + "grad_norm": 27.84840965270996, + "learning_rate": 5e-05, + "loss": 1.5046, + "num_input_tokens_seen": 120660840, + "step": 1822 + }, + { + "epoch": 0.1705433612580147, + "loss": 1.4361482858657837, + "loss_ce": 0.004995943978428841, + "loss_iou": 0.59375, + "loss_num": 0.048583984375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 120660840, + "step": 1822 + }, + { + "epoch": 0.17063696354191044, + "grad_norm": 25.643396377563477, + "learning_rate": 5e-05, + "loss": 1.4685, + "num_input_tokens_seen": 120726708, + "step": 1823 + }, + { + "epoch": 0.17063696354191044, + "loss": 1.4662539958953857, + "loss_ce": 0.005804789252579212, + "loss_iou": 0.625, + "loss_num": 0.042236328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 120726708, + "step": 1823 + }, + { + "epoch": 0.17073056582580615, + "grad_norm": 17.447202682495117, + "learning_rate": 5e-05, + "loss": 1.3424, + "num_input_tokens_seen": 120792628, + "step": 1824 + }, + { + "epoch": 0.17073056582580615, + "loss": 1.3110202550888062, + "loss_ce": 0.005356179550290108, + "loss_iou": 0.5234375, + "loss_num": 0.052001953125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 120792628, + "step": 1824 + }, + { + "epoch": 0.17082416810970188, + "grad_norm": 13.089118003845215, + "learning_rate": 5e-05, + "loss": 1.3288, + "num_input_tokens_seen": 120857956, + "step": 1825 + }, + { + "epoch": 0.17082416810970188, + "loss": 1.1957464218139648, + "loss_ce": 0.002386966720223427, + "loss_iou": 0.462890625, + "loss_num": 0.053466796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 120857956, + "step": 1825 + }, + { + "epoch": 0.1709177703935976, + "grad_norm": 24.77213478088379, + "learning_rate": 5e-05, + "loss": 1.3773, + "num_input_tokens_seen": 120924276, + "step": 1826 + }, + { + "epoch": 0.1709177703935976, + "loss": 1.726757287979126, + "loss_ce": 0.0031245131976902485, + "loss_iou": 0.66796875, + "loss_num": 0.0771484375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 120924276, + "step": 1826 + }, + { + "epoch": 0.17101137267749333, + "grad_norm": 26.462329864501953, + "learning_rate": 5e-05, + "loss": 1.6389, + "num_input_tokens_seen": 120991344, + "step": 1827 + }, + { + "epoch": 0.17101137267749333, + "loss": 1.6456923484802246, + "loss_ce": 0.002137661213055253, + "loss_iou": 0.74609375, + "loss_num": 0.031005859375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 120991344, + "step": 1827 + }, + { + "epoch": 0.17110497496138904, + "grad_norm": 21.92740821838379, + "learning_rate": 5e-05, + "loss": 1.5548, + "num_input_tokens_seen": 121058888, + "step": 1828 + }, + { + "epoch": 0.17110497496138904, + "loss": 1.5315115451812744, + "loss_ce": 0.0051443311385810375, + "loss_iou": 0.6875, + "loss_num": 0.0299072265625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 121058888, + "step": 1828 + }, + { + "epoch": 0.17119857724528478, + "grad_norm": 20.751903533935547, + "learning_rate": 5e-05, + "loss": 1.3661, + "num_input_tokens_seen": 121124576, + "step": 1829 + }, + { + "epoch": 0.17119857724528478, + "loss": 1.343775749206543, + "loss_ce": 0.008326517418026924, + "loss_iou": 0.51953125, + "loss_num": 0.059326171875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 121124576, + "step": 1829 + }, + { + "epoch": 0.17129217952918052, + "grad_norm": 79.73170471191406, + "learning_rate": 5e-05, + "loss": 1.6314, + "num_input_tokens_seen": 121190060, + "step": 1830 + }, + { + "epoch": 0.17129217952918052, + "loss": 1.396977186203003, + "loss_ce": 0.005375603213906288, + "loss_iou": 0.6328125, + "loss_num": 0.0263671875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 121190060, + "step": 1830 + }, + { + "epoch": 0.17138578181307623, + "grad_norm": 14.038053512573242, + "learning_rate": 5e-05, + "loss": 1.2064, + "num_input_tokens_seen": 121257456, + "step": 1831 + }, + { + "epoch": 0.17138578181307623, + "loss": 1.0631022453308105, + "loss_ce": 0.003532049711793661, + "loss_iou": 0.44921875, + "loss_num": 0.0322265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 121257456, + "step": 1831 + }, + { + "epoch": 0.17147938409697197, + "grad_norm": 18.470840454101562, + "learning_rate": 5e-05, + "loss": 1.4822, + "num_input_tokens_seen": 121323072, + "step": 1832 + }, + { + "epoch": 0.17147938409697197, + "loss": 1.5085363388061523, + "loss_ce": 0.003653568448498845, + "loss_iou": 0.65625, + "loss_num": 0.039306640625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 121323072, + "step": 1832 + }, + { + "epoch": 0.17157298638086768, + "grad_norm": 39.324737548828125, + "learning_rate": 5e-05, + "loss": 1.5217, + "num_input_tokens_seen": 121389772, + "step": 1833 + }, + { + "epoch": 0.17157298638086768, + "loss": 1.4324517250061035, + "loss_ce": 0.004717256408184767, + "loss_iou": 0.6328125, + "loss_num": 0.03271484375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 121389772, + "step": 1833 + }, + { + "epoch": 0.17166658866476342, + "grad_norm": 16.263824462890625, + "learning_rate": 5e-05, + "loss": 1.7437, + "num_input_tokens_seen": 121455420, + "step": 1834 + }, + { + "epoch": 0.17166658866476342, + "loss": 1.6695587635040283, + "loss_ce": 0.0025665597058832645, + "loss_iou": 0.72265625, + "loss_num": 0.0439453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 121455420, + "step": 1834 + }, + { + "epoch": 0.17176019094865916, + "grad_norm": 20.818655014038086, + "learning_rate": 5e-05, + "loss": 1.5081, + "num_input_tokens_seen": 121521596, + "step": 1835 + }, + { + "epoch": 0.17176019094865916, + "loss": 1.6034801006317139, + "loss_ce": 0.006312092300504446, + "loss_iou": 0.671875, + "loss_num": 0.05029296875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 121521596, + "step": 1835 + }, + { + "epoch": 0.17185379323255487, + "grad_norm": 24.824081420898438, + "learning_rate": 5e-05, + "loss": 1.6034, + "num_input_tokens_seen": 121584892, + "step": 1836 + }, + { + "epoch": 0.17185379323255487, + "loss": 1.5662217140197754, + "loss_ce": 0.005674734711647034, + "loss_iou": 0.65625, + "loss_num": 0.0498046875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 121584892, + "step": 1836 + }, + { + "epoch": 0.1719473955164506, + "grad_norm": 25.136316299438477, + "learning_rate": 5e-05, + "loss": 1.6328, + "num_input_tokens_seen": 121652308, + "step": 1837 + }, + { + "epoch": 0.1719473955164506, + "loss": 1.5209910869598389, + "loss_ce": 0.005366027355194092, + "loss_iou": 0.66015625, + "loss_num": 0.039306640625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 121652308, + "step": 1837 + }, + { + "epoch": 0.17204099780034632, + "grad_norm": 32.42887496948242, + "learning_rate": 5e-05, + "loss": 1.2909, + "num_input_tokens_seen": 121718660, + "step": 1838 + }, + { + "epoch": 0.17204099780034632, + "loss": 1.1097770929336548, + "loss_ce": 0.004705031868070364, + "loss_iou": 0.4765625, + "loss_num": 0.0302734375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 121718660, + "step": 1838 + }, + { + "epoch": 0.17213460008424206, + "grad_norm": 17.299095153808594, + "learning_rate": 5e-05, + "loss": 1.8605, + "num_input_tokens_seen": 121784776, + "step": 1839 + }, + { + "epoch": 0.17213460008424206, + "loss": 1.9374628067016602, + "loss_ce": 0.006798781454563141, + "loss_iou": 0.79296875, + "loss_num": 0.06884765625, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 121784776, + "step": 1839 + }, + { + "epoch": 0.1722282023681378, + "grad_norm": 21.14668083190918, + "learning_rate": 5e-05, + "loss": 1.2216, + "num_input_tokens_seen": 121850540, + "step": 1840 + }, + { + "epoch": 0.1722282023681378, + "loss": 1.1206915378570557, + "loss_ce": 0.0027715619653463364, + "loss_iou": 0.5078125, + "loss_num": 0.019775390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 121850540, + "step": 1840 + }, + { + "epoch": 0.1723218046520335, + "grad_norm": 34.61922836303711, + "learning_rate": 5e-05, + "loss": 1.5579, + "num_input_tokens_seen": 121916300, + "step": 1841 + }, + { + "epoch": 0.1723218046520335, + "loss": 1.5194497108459473, + "loss_ce": 0.002359856851398945, + "loss_iou": 0.62890625, + "loss_num": 0.052734375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 121916300, + "step": 1841 + }, + { + "epoch": 0.17241540693592924, + "grad_norm": 20.034870147705078, + "learning_rate": 5e-05, + "loss": 1.7421, + "num_input_tokens_seen": 121982672, + "step": 1842 + }, + { + "epoch": 0.17241540693592924, + "loss": 1.7515795230865479, + "loss_ce": 0.005974119529128075, + "loss_iou": 0.7109375, + "loss_num": 0.064453125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 121982672, + "step": 1842 + }, + { + "epoch": 0.17250900921982495, + "grad_norm": 20.295759201049805, + "learning_rate": 5e-05, + "loss": 1.3312, + "num_input_tokens_seen": 122049272, + "step": 1843 + }, + { + "epoch": 0.17250900921982495, + "loss": 1.3228318691253662, + "loss_ce": 0.005449024029076099, + "loss_iou": 0.57421875, + "loss_num": 0.03369140625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 122049272, + "step": 1843 + }, + { + "epoch": 0.1726026115037207, + "grad_norm": 30.42950439453125, + "learning_rate": 5e-05, + "loss": 1.4602, + "num_input_tokens_seen": 122114820, + "step": 1844 + }, + { + "epoch": 0.1726026115037207, + "loss": 1.266453742980957, + "loss_ce": 0.005223327316343784, + "loss_iou": 0.52734375, + "loss_num": 0.041259765625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 122114820, + "step": 1844 + }, + { + "epoch": 0.17269621378761643, + "grad_norm": 20.07485580444336, + "learning_rate": 5e-05, + "loss": 1.4999, + "num_input_tokens_seen": 122180588, + "step": 1845 + }, + { + "epoch": 0.17269621378761643, + "loss": 1.6367614269256592, + "loss_ce": 0.002972241025418043, + "loss_iou": 0.69140625, + "loss_num": 0.05078125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 122180588, + "step": 1845 + }, + { + "epoch": 0.17278981607151214, + "grad_norm": 26.721233367919922, + "learning_rate": 5e-05, + "loss": 1.232, + "num_input_tokens_seen": 122247328, + "step": 1846 + }, + { + "epoch": 0.17278981607151214, + "loss": 1.1850439310073853, + "loss_ce": 0.0014501872938126326, + "loss_iou": 0.5390625, + "loss_num": 0.021484375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 122247328, + "step": 1846 + }, + { + "epoch": 0.17288341835540788, + "grad_norm": 21.758010864257812, + "learning_rate": 5e-05, + "loss": 1.5634, + "num_input_tokens_seen": 122314604, + "step": 1847 + }, + { + "epoch": 0.17288341835540788, + "loss": 1.5600001811981201, + "loss_ce": 0.007265835534781218, + "loss_iou": 0.6328125, + "loss_num": 0.05712890625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 122314604, + "step": 1847 + }, + { + "epoch": 0.1729770206393036, + "grad_norm": 29.43509864807129, + "learning_rate": 5e-05, + "loss": 1.6338, + "num_input_tokens_seen": 122381048, + "step": 1848 + }, + { + "epoch": 0.1729770206393036, + "loss": 1.8224163055419922, + "loss_ce": 0.006010045763105154, + "loss_iou": 0.7578125, + "loss_num": 0.06005859375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 122381048, + "step": 1848 + }, + { + "epoch": 0.17307062292319933, + "grad_norm": 54.261634826660156, + "learning_rate": 5e-05, + "loss": 1.4317, + "num_input_tokens_seen": 122448296, + "step": 1849 + }, + { + "epoch": 0.17307062292319933, + "loss": 1.2774088382720947, + "loss_ce": 0.008365956135094166, + "loss_iou": 0.58203125, + "loss_num": 0.0213623046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 122448296, + "step": 1849 + }, + { + "epoch": 0.17316422520709504, + "grad_norm": 29.9807071685791, + "learning_rate": 5e-05, + "loss": 1.4884, + "num_input_tokens_seen": 122514048, + "step": 1850 + }, + { + "epoch": 0.17316422520709504, + "loss": 1.5707565546035767, + "loss_ce": 0.006791671738028526, + "loss_iou": 0.6328125, + "loss_num": 0.060302734375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 122514048, + "step": 1850 + }, + { + "epoch": 0.17325782749099078, + "grad_norm": 27.879579544067383, + "learning_rate": 5e-05, + "loss": 1.7332, + "num_input_tokens_seen": 122579968, + "step": 1851 + }, + { + "epoch": 0.17325782749099078, + "loss": 1.716784954071045, + "loss_ce": 0.006823934614658356, + "loss_iou": 0.7421875, + "loss_num": 0.045654296875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 122579968, + "step": 1851 + }, + { + "epoch": 0.17335142977488652, + "grad_norm": 33.21861267089844, + "learning_rate": 5e-05, + "loss": 1.5843, + "num_input_tokens_seen": 122646080, + "step": 1852 + }, + { + "epoch": 0.17335142977488652, + "loss": 1.7422574758529663, + "loss_ce": 0.0049528456293046474, + "loss_iou": 0.7265625, + "loss_num": 0.056884765625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 122646080, + "step": 1852 + }, + { + "epoch": 0.17344503205878223, + "grad_norm": 27.33934211730957, + "learning_rate": 5e-05, + "loss": 1.56, + "num_input_tokens_seen": 122712512, + "step": 1853 + }, + { + "epoch": 0.17344503205878223, + "loss": 1.5886988639831543, + "loss_ce": 0.004714410752058029, + "loss_iou": 0.6953125, + "loss_num": 0.039306640625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 122712512, + "step": 1853 + }, + { + "epoch": 0.17353863434267797, + "grad_norm": 24.189306259155273, + "learning_rate": 5e-05, + "loss": 1.3923, + "num_input_tokens_seen": 122778824, + "step": 1854 + }, + { + "epoch": 0.17353863434267797, + "loss": 1.3535451889038086, + "loss_ce": 0.01174830086529255, + "loss_iou": 0.55859375, + "loss_num": 0.04443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 122778824, + "step": 1854 + }, + { + "epoch": 0.17363223662657368, + "grad_norm": 35.08366394042969, + "learning_rate": 5e-05, + "loss": 1.4697, + "num_input_tokens_seen": 122844616, + "step": 1855 + }, + { + "epoch": 0.17363223662657368, + "loss": 1.5058411359786987, + "loss_ce": 0.006817711051553488, + "loss_iou": 0.66015625, + "loss_num": 0.035888671875, + "loss_xval": 1.5, + "num_input_tokens_seen": 122844616, + "step": 1855 + }, + { + "epoch": 0.17372583891046942, + "grad_norm": 28.835338592529297, + "learning_rate": 5e-05, + "loss": 1.3623, + "num_input_tokens_seen": 122909672, + "step": 1856 + }, + { + "epoch": 0.17372583891046942, + "loss": 1.3965022563934326, + "loss_ce": 0.003924140240997076, + "loss_iou": 0.59375, + "loss_num": 0.04150390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 122909672, + "step": 1856 + }, + { + "epoch": 0.17381944119436515, + "grad_norm": 20.535924911499023, + "learning_rate": 5e-05, + "loss": 1.6023, + "num_input_tokens_seen": 122975176, + "step": 1857 + }, + { + "epoch": 0.17381944119436515, + "loss": 1.4852015972137451, + "loss_ce": 0.0076626078225672245, + "loss_iou": 0.640625, + "loss_num": 0.03857421875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 122975176, + "step": 1857 + }, + { + "epoch": 0.17391304347826086, + "grad_norm": 22.818687438964844, + "learning_rate": 5e-05, + "loss": 1.2961, + "num_input_tokens_seen": 123041632, + "step": 1858 + }, + { + "epoch": 0.17391304347826086, + "loss": 1.2844634056091309, + "loss_ce": 0.006875484250485897, + "loss_iou": 0.50390625, + "loss_num": 0.0546875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 123041632, + "step": 1858 + }, + { + "epoch": 0.1740066457621566, + "grad_norm": 44.3472900390625, + "learning_rate": 5e-05, + "loss": 1.5116, + "num_input_tokens_seen": 123107284, + "step": 1859 + }, + { + "epoch": 0.1740066457621566, + "loss": 1.563816785812378, + "loss_ce": 0.0052230944857001305, + "loss_iou": 0.68359375, + "loss_num": 0.037841796875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 123107284, + "step": 1859 + }, + { + "epoch": 0.1741002480460523, + "grad_norm": 25.473066329956055, + "learning_rate": 5e-05, + "loss": 1.7615, + "num_input_tokens_seen": 123173360, + "step": 1860 + }, + { + "epoch": 0.1741002480460523, + "loss": 1.6592191457748413, + "loss_ce": 0.005898888222873211, + "loss_iou": 0.75, + "loss_num": 0.03125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 123173360, + "step": 1860 + }, + { + "epoch": 0.17419385032994805, + "grad_norm": 23.157337188720703, + "learning_rate": 5e-05, + "loss": 1.4354, + "num_input_tokens_seen": 123239668, + "step": 1861 + }, + { + "epoch": 0.17419385032994805, + "loss": 1.3325526714324951, + "loss_ce": 0.0024744963739067316, + "loss_iou": 0.578125, + "loss_num": 0.0341796875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 123239668, + "step": 1861 + }, + { + "epoch": 0.1742874526138438, + "grad_norm": 23.07021141052246, + "learning_rate": 5e-05, + "loss": 1.3751, + "num_input_tokens_seen": 123307004, + "step": 1862 + }, + { + "epoch": 0.1742874526138438, + "loss": 1.138388991355896, + "loss_ce": 0.0031351184006780386, + "loss_iou": 0.5, + "loss_num": 0.02734375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 123307004, + "step": 1862 + }, + { + "epoch": 0.1743810548977395, + "grad_norm": 20.14305877685547, + "learning_rate": 5e-05, + "loss": 1.5347, + "num_input_tokens_seen": 123372628, + "step": 1863 + }, + { + "epoch": 0.1743810548977395, + "loss": 1.4409219026565552, + "loss_ce": 0.005375009961426258, + "loss_iou": 0.6171875, + "loss_num": 0.03955078125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 123372628, + "step": 1863 + }, + { + "epoch": 0.17447465718163524, + "grad_norm": 21.856277465820312, + "learning_rate": 5e-05, + "loss": 1.5748, + "num_input_tokens_seen": 123439596, + "step": 1864 + }, + { + "epoch": 0.17447465718163524, + "loss": 1.5219848155975342, + "loss_ce": 0.003918478265404701, + "loss_iou": 0.64453125, + "loss_num": 0.04638671875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 123439596, + "step": 1864 + }, + { + "epoch": 0.17456825946553095, + "grad_norm": 29.275348663330078, + "learning_rate": 5e-05, + "loss": 1.3077, + "num_input_tokens_seen": 123506000, + "step": 1865 + }, + { + "epoch": 0.17456825946553095, + "loss": 1.416922926902771, + "loss_ce": 0.0038370315451174974, + "loss_iou": 0.6328125, + "loss_num": 0.029541015625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 123506000, + "step": 1865 + }, + { + "epoch": 0.1746618617494267, + "grad_norm": 47.204654693603516, + "learning_rate": 5e-05, + "loss": 1.4407, + "num_input_tokens_seen": 123571656, + "step": 1866 + }, + { + "epoch": 0.1746618617494267, + "loss": 1.3536100387573242, + "loss_ce": 0.0040006255730986595, + "loss_iou": 0.59375, + "loss_num": 0.032470703125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 123571656, + "step": 1866 + }, + { + "epoch": 0.1747554640333224, + "grad_norm": 30.03777313232422, + "learning_rate": 5e-05, + "loss": 1.4577, + "num_input_tokens_seen": 123637340, + "step": 1867 + }, + { + "epoch": 0.1747554640333224, + "loss": 1.6500486135482788, + "loss_ce": 0.0016110586002469063, + "loss_iou": 0.69921875, + "loss_num": 0.049072265625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 123637340, + "step": 1867 + }, + { + "epoch": 0.17484906631721814, + "grad_norm": 27.495819091796875, + "learning_rate": 5e-05, + "loss": 1.6743, + "num_input_tokens_seen": 123703956, + "step": 1868 + }, + { + "epoch": 0.17484906631721814, + "loss": 1.6631691455841064, + "loss_ce": 0.004966053646057844, + "loss_iou": 0.73046875, + "loss_num": 0.0390625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 123703956, + "step": 1868 + }, + { + "epoch": 0.17494266860111388, + "grad_norm": 22.417184829711914, + "learning_rate": 5e-05, + "loss": 1.2335, + "num_input_tokens_seen": 123769896, + "step": 1869 + }, + { + "epoch": 0.17494266860111388, + "loss": 1.2517322301864624, + "loss_ce": 0.00515022873878479, + "loss_iou": 0.546875, + "loss_num": 0.03076171875, + "loss_xval": 1.25, + "num_input_tokens_seen": 123769896, + "step": 1869 + }, + { + "epoch": 0.1750362708850096, + "grad_norm": 16.17466926574707, + "learning_rate": 5e-05, + "loss": 1.6097, + "num_input_tokens_seen": 123836928, + "step": 1870 + }, + { + "epoch": 0.1750362708850096, + "loss": 1.4315295219421387, + "loss_ce": 0.0052599553018808365, + "loss_iou": 0.58984375, + "loss_num": 0.049560546875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 123836928, + "step": 1870 + }, + { + "epoch": 0.17512987316890533, + "grad_norm": 16.639633178710938, + "learning_rate": 5e-05, + "loss": 1.3924, + "num_input_tokens_seen": 123903860, + "step": 1871 + }, + { + "epoch": 0.17512987316890533, + "loss": 1.237128496170044, + "loss_ce": 0.0008003418333828449, + "loss_iou": 0.55078125, + "loss_num": 0.0269775390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 123903860, + "step": 1871 + }, + { + "epoch": 0.17522347545280104, + "grad_norm": 15.891419410705566, + "learning_rate": 5e-05, + "loss": 1.1916, + "num_input_tokens_seen": 123970420, + "step": 1872 + }, + { + "epoch": 0.17522347545280104, + "loss": 1.1824283599853516, + "loss_ce": 0.0051822601817548275, + "loss_iou": 0.53125, + "loss_num": 0.023193359375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 123970420, + "step": 1872 + }, + { + "epoch": 0.17531707773669677, + "grad_norm": 28.685651779174805, + "learning_rate": 5e-05, + "loss": 1.6561, + "num_input_tokens_seen": 124036516, + "step": 1873 + }, + { + "epoch": 0.17531707773669677, + "loss": 1.8574066162109375, + "loss_ce": 0.00242615956813097, + "loss_iou": 0.765625, + "loss_num": 0.06396484375, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 124036516, + "step": 1873 + }, + { + "epoch": 0.1754106800205925, + "grad_norm": 13.578001022338867, + "learning_rate": 5e-05, + "loss": 1.4323, + "num_input_tokens_seen": 124102304, + "step": 1874 + }, + { + "epoch": 0.1754106800205925, + "loss": 1.725637674331665, + "loss_ce": 0.003957986831665039, + "loss_iou": 0.7265625, + "loss_num": 0.052734375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 124102304, + "step": 1874 + }, + { + "epoch": 0.17550428230448822, + "grad_norm": 13.442028045654297, + "learning_rate": 5e-05, + "loss": 1.5378, + "num_input_tokens_seen": 124169080, + "step": 1875 + }, + { + "epoch": 0.17550428230448822, + "loss": 1.678246021270752, + "loss_ce": 0.010277180932462215, + "loss_iou": 0.6953125, + "loss_num": 0.05517578125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 124169080, + "step": 1875 + }, + { + "epoch": 0.17559788458838396, + "grad_norm": 39.67034912109375, + "learning_rate": 5e-05, + "loss": 1.3669, + "num_input_tokens_seen": 124235620, + "step": 1876 + }, + { + "epoch": 0.17559788458838396, + "loss": 1.5303378105163574, + "loss_ce": 0.003970663528889418, + "loss_iou": 0.62890625, + "loss_num": 0.053466796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 124235620, + "step": 1876 + }, + { + "epoch": 0.17569148687227967, + "grad_norm": 18.186180114746094, + "learning_rate": 5e-05, + "loss": 1.253, + "num_input_tokens_seen": 124302224, + "step": 1877 + }, + { + "epoch": 0.17569148687227967, + "loss": 1.2854080200195312, + "loss_ce": 0.004646319895982742, + "loss_iou": 0.58203125, + "loss_num": 0.0238037109375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 124302224, + "step": 1877 + }, + { + "epoch": 0.1757850891561754, + "grad_norm": 36.3538932800293, + "learning_rate": 5e-05, + "loss": 1.4756, + "num_input_tokens_seen": 124368216, + "step": 1878 + }, + { + "epoch": 0.1757850891561754, + "loss": 1.2984278202056885, + "loss_ce": 0.006435736082494259, + "loss_iou": 0.546875, + "loss_num": 0.0400390625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 124368216, + "step": 1878 + }, + { + "epoch": 0.17587869144007115, + "grad_norm": 21.211565017700195, + "learning_rate": 5e-05, + "loss": 1.6729, + "num_input_tokens_seen": 124434272, + "step": 1879 + }, + { + "epoch": 0.17587869144007115, + "loss": 1.5440338850021362, + "loss_ce": 0.0037505985237658024, + "loss_iou": 0.640625, + "loss_num": 0.051025390625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 124434272, + "step": 1879 + }, + { + "epoch": 0.17597229372396686, + "grad_norm": 22.88192367553711, + "learning_rate": 5e-05, + "loss": 1.4055, + "num_input_tokens_seen": 124500280, + "step": 1880 + }, + { + "epoch": 0.17597229372396686, + "loss": 1.4746317863464355, + "loss_ce": 0.005393535830080509, + "loss_iou": 0.5703125, + "loss_num": 0.06591796875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 124500280, + "step": 1880 + }, + { + "epoch": 0.1760658960078626, + "grad_norm": 42.942787170410156, + "learning_rate": 5e-05, + "loss": 1.7371, + "num_input_tokens_seen": 124566792, + "step": 1881 + }, + { + "epoch": 0.1760658960078626, + "loss": 1.6196497678756714, + "loss_ce": 0.0034388331696391106, + "loss_iou": 0.703125, + "loss_num": 0.04150390625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 124566792, + "step": 1881 + }, + { + "epoch": 0.1761594982917583, + "grad_norm": 20.981191635131836, + "learning_rate": 5e-05, + "loss": 1.727, + "num_input_tokens_seen": 124633316, + "step": 1882 + }, + { + "epoch": 0.1761594982917583, + "loss": 1.6363775730133057, + "loss_ce": 0.004785794764757156, + "loss_iou": 0.6953125, + "loss_num": 0.047607421875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 124633316, + "step": 1882 + }, + { + "epoch": 0.17625310057565405, + "grad_norm": 43.8437614440918, + "learning_rate": 5e-05, + "loss": 1.4488, + "num_input_tokens_seen": 124700520, + "step": 1883 + }, + { + "epoch": 0.17625310057565405, + "loss": 1.471380591392517, + "loss_ce": 0.0011657995637506247, + "loss_iou": 0.62109375, + "loss_num": 0.045654296875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 124700520, + "step": 1883 + }, + { + "epoch": 0.1763467028595498, + "grad_norm": 25.370458602905273, + "learning_rate": 5e-05, + "loss": 1.2918, + "num_input_tokens_seen": 124767320, + "step": 1884 + }, + { + "epoch": 0.1763467028595498, + "loss": 1.2371501922607422, + "loss_ce": 0.006681433413177729, + "loss_iou": 0.52734375, + "loss_num": 0.035888671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 124767320, + "step": 1884 + }, + { + "epoch": 0.1764403051434455, + "grad_norm": 20.513080596923828, + "learning_rate": 5e-05, + "loss": 1.5742, + "num_input_tokens_seen": 124832456, + "step": 1885 + }, + { + "epoch": 0.1764403051434455, + "loss": 1.465209722518921, + "loss_ce": 0.005737133789807558, + "loss_iou": 0.63671875, + "loss_num": 0.037841796875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 124832456, + "step": 1885 + }, + { + "epoch": 0.17653390742734124, + "grad_norm": 43.855838775634766, + "learning_rate": 5e-05, + "loss": 1.3493, + "num_input_tokens_seen": 124899228, + "step": 1886 + }, + { + "epoch": 0.17653390742734124, + "loss": 1.4110389947891235, + "loss_ce": 0.006742175668478012, + "loss_iou": 0.61328125, + "loss_num": 0.035400390625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 124899228, + "step": 1886 + }, + { + "epoch": 0.17662750971123695, + "grad_norm": 12.336699485778809, + "learning_rate": 5e-05, + "loss": 1.3108, + "num_input_tokens_seen": 124966564, + "step": 1887 + }, + { + "epoch": 0.17662750971123695, + "loss": 1.3217785358428955, + "loss_ce": 0.005372196435928345, + "loss_iou": 0.56640625, + "loss_num": 0.03662109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 124966564, + "step": 1887 + }, + { + "epoch": 0.17672111199513268, + "grad_norm": 22.85047149658203, + "learning_rate": 5e-05, + "loss": 1.473, + "num_input_tokens_seen": 125031436, + "step": 1888 + }, + { + "epoch": 0.17672111199513268, + "loss": 1.7604337930679321, + "loss_ce": 0.007504129782319069, + "loss_iou": 0.7109375, + "loss_num": 0.06640625, + "loss_xval": 1.75, + "num_input_tokens_seen": 125031436, + "step": 1888 + }, + { + "epoch": 0.1768147142790284, + "grad_norm": 74.69544982910156, + "learning_rate": 5e-05, + "loss": 1.4932, + "num_input_tokens_seen": 125098176, + "step": 1889 + }, + { + "epoch": 0.1768147142790284, + "loss": 1.30013108253479, + "loss_ce": 0.0032560420222580433, + "loss_iou": 0.5625, + "loss_num": 0.03369140625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 125098176, + "step": 1889 + }, + { + "epoch": 0.17690831656292413, + "grad_norm": 22.372865676879883, + "learning_rate": 5e-05, + "loss": 1.3025, + "num_input_tokens_seen": 125165752, + "step": 1890 + }, + { + "epoch": 0.17690831656292413, + "loss": 1.313957691192627, + "loss_ce": 0.004387363791465759, + "loss_iou": 0.55859375, + "loss_num": 0.0390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 125165752, + "step": 1890 + }, + { + "epoch": 0.17700191884681987, + "grad_norm": 33.97995376586914, + "learning_rate": 5e-05, + "loss": 1.2417, + "num_input_tokens_seen": 125232416, + "step": 1891 + }, + { + "epoch": 0.17700191884681987, + "loss": 1.2535755634307861, + "loss_ce": 0.00650528259575367, + "loss_iou": 0.5546875, + "loss_num": 0.02783203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 125232416, + "step": 1891 + }, + { + "epoch": 0.17709552113071558, + "grad_norm": 31.841249465942383, + "learning_rate": 5e-05, + "loss": 1.5356, + "num_input_tokens_seen": 125299152, + "step": 1892 + }, + { + "epoch": 0.17709552113071558, + "loss": 1.565967321395874, + "loss_ce": 0.004443918354809284, + "loss_iou": 0.63671875, + "loss_num": 0.057861328125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 125299152, + "step": 1892 + }, + { + "epoch": 0.17718912341461132, + "grad_norm": 19.4390926361084, + "learning_rate": 5e-05, + "loss": 1.6589, + "num_input_tokens_seen": 125365168, + "step": 1893 + }, + { + "epoch": 0.17718912341461132, + "loss": 1.5728219747543335, + "loss_ce": 0.004462606273591518, + "loss_iou": 0.671875, + "loss_num": 0.04443359375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 125365168, + "step": 1893 + }, + { + "epoch": 0.17728272569850703, + "grad_norm": 12.243718147277832, + "learning_rate": 5e-05, + "loss": 1.3969, + "num_input_tokens_seen": 125431516, + "step": 1894 + }, + { + "epoch": 0.17728272569850703, + "loss": 1.1028046607971191, + "loss_ce": 0.006124977953732014, + "loss_iou": 0.46484375, + "loss_num": 0.032958984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 125431516, + "step": 1894 + }, + { + "epoch": 0.17737632798240277, + "grad_norm": 18.179861068725586, + "learning_rate": 5e-05, + "loss": 1.3818, + "num_input_tokens_seen": 125497240, + "step": 1895 + }, + { + "epoch": 0.17737632798240277, + "loss": 1.4240645170211792, + "loss_ce": 0.00463092653080821, + "loss_iou": 0.546875, + "loss_num": 0.06494140625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 125497240, + "step": 1895 + }, + { + "epoch": 0.1774699302662985, + "grad_norm": 17.810461044311523, + "learning_rate": 5e-05, + "loss": 1.6627, + "num_input_tokens_seen": 125562696, + "step": 1896 + }, + { + "epoch": 0.1774699302662985, + "loss": 1.6232903003692627, + "loss_ce": 0.005187239032238722, + "loss_iou": 0.66015625, + "loss_num": 0.058837890625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 125562696, + "step": 1896 + }, + { + "epoch": 0.17756353255019422, + "grad_norm": 19.378154754638672, + "learning_rate": 5e-05, + "loss": 1.4421, + "num_input_tokens_seen": 125629324, + "step": 1897 + }, + { + "epoch": 0.17756353255019422, + "loss": 1.4215199947357178, + "loss_ce": 0.006480968091636896, + "loss_iou": 0.5859375, + "loss_num": 0.0478515625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 125629324, + "step": 1897 + }, + { + "epoch": 0.17765713483408996, + "grad_norm": 22.260164260864258, + "learning_rate": 5e-05, + "loss": 1.6494, + "num_input_tokens_seen": 125694512, + "step": 1898 + }, + { + "epoch": 0.17765713483408996, + "loss": 1.6587601900100708, + "loss_ce": 0.005439842119812965, + "loss_iou": 0.69921875, + "loss_num": 0.0517578125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 125694512, + "step": 1898 + }, + { + "epoch": 0.17775073711798567, + "grad_norm": 23.74010467529297, + "learning_rate": 5e-05, + "loss": 1.3235, + "num_input_tokens_seen": 125760940, + "step": 1899 + }, + { + "epoch": 0.17775073711798567, + "loss": 1.5131752490997314, + "loss_ce": 0.007315901108086109, + "loss_iou": 0.6328125, + "loss_num": 0.047607421875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 125760940, + "step": 1899 + }, + { + "epoch": 0.1778443394018814, + "grad_norm": 16.91720199584961, + "learning_rate": 5e-05, + "loss": 1.2475, + "num_input_tokens_seen": 125825884, + "step": 1900 + }, + { + "epoch": 0.1778443394018814, + "loss": 1.4790034294128418, + "loss_ce": 0.0034785200841724873, + "loss_iou": 0.63671875, + "loss_num": 0.04052734375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 125825884, + "step": 1900 + }, + { + "epoch": 0.17793794168577715, + "grad_norm": 18.59794807434082, + "learning_rate": 5e-05, + "loss": 1.1832, + "num_input_tokens_seen": 125891372, + "step": 1901 + }, + { + "epoch": 0.17793794168577715, + "loss": 1.0883831977844238, + "loss_ce": 0.0037275196518749, + "loss_iou": 0.462890625, + "loss_num": 0.031982421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 125891372, + "step": 1901 + }, + { + "epoch": 0.17803154396967286, + "grad_norm": 16.934354782104492, + "learning_rate": 5e-05, + "loss": 1.4211, + "num_input_tokens_seen": 125957712, + "step": 1902 + }, + { + "epoch": 0.17803154396967286, + "loss": 1.2986729145050049, + "loss_ce": 0.0032626825850456953, + "loss_iou": 0.5703125, + "loss_num": 0.0308837890625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 125957712, + "step": 1902 + }, + { + "epoch": 0.1781251462535686, + "grad_norm": 17.2276668548584, + "learning_rate": 5e-05, + "loss": 1.2597, + "num_input_tokens_seen": 126023736, + "step": 1903 + }, + { + "epoch": 0.1781251462535686, + "loss": 1.0219440460205078, + "loss_ce": 0.0024127333890646696, + "loss_iou": 0.46875, + "loss_num": 0.0167236328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 126023736, + "step": 1903 + }, + { + "epoch": 0.1782187485374643, + "grad_norm": 21.607051849365234, + "learning_rate": 5e-05, + "loss": 1.5113, + "num_input_tokens_seen": 126090644, + "step": 1904 + }, + { + "epoch": 0.1782187485374643, + "loss": 1.3699097633361816, + "loss_ce": 0.005163642577826977, + "loss_iou": 0.54296875, + "loss_num": 0.05517578125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 126090644, + "step": 1904 + }, + { + "epoch": 0.17831235082136004, + "grad_norm": 36.899269104003906, + "learning_rate": 5e-05, + "loss": 1.6401, + "num_input_tokens_seen": 126156352, + "step": 1905 + }, + { + "epoch": 0.17831235082136004, + "loss": 1.6202868223190308, + "loss_ce": 0.0030993036925792694, + "loss_iou": 0.671875, + "loss_num": 0.053955078125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 126156352, + "step": 1905 + }, + { + "epoch": 0.17840595310525575, + "grad_norm": 16.8770751953125, + "learning_rate": 5e-05, + "loss": 1.6128, + "num_input_tokens_seen": 126222444, + "step": 1906 + }, + { + "epoch": 0.17840595310525575, + "loss": 1.82246732711792, + "loss_ce": 0.00410798192024231, + "loss_iou": 0.8125, + "loss_num": 0.038818359375, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 126222444, + "step": 1906 + }, + { + "epoch": 0.1784995553891515, + "grad_norm": 21.94072723388672, + "learning_rate": 5e-05, + "loss": 1.4607, + "num_input_tokens_seen": 126288764, + "step": 1907 + }, + { + "epoch": 0.1784995553891515, + "loss": 1.4229366779327393, + "loss_ce": 0.00447956845164299, + "loss_iou": 0.609375, + "loss_num": 0.040771484375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 126288764, + "step": 1907 + }, + { + "epoch": 0.17859315767304723, + "grad_norm": 38.17008972167969, + "learning_rate": 5e-05, + "loss": 1.4479, + "num_input_tokens_seen": 126355220, + "step": 1908 + }, + { + "epoch": 0.17859315767304723, + "loss": 1.4375016689300537, + "loss_ce": 0.003419750602915883, + "loss_iou": 0.625, + "loss_num": 0.03759765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 126355220, + "step": 1908 + }, + { + "epoch": 0.17868675995694294, + "grad_norm": 17.322668075561523, + "learning_rate": 5e-05, + "loss": 1.5267, + "num_input_tokens_seen": 126422608, + "step": 1909 + }, + { + "epoch": 0.17868675995694294, + "loss": 1.6921244859695435, + "loss_ce": 0.00755410548299551, + "loss_iou": 0.73828125, + "loss_num": 0.0419921875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 126422608, + "step": 1909 + }, + { + "epoch": 0.17878036224083868, + "grad_norm": 22.217540740966797, + "learning_rate": 5e-05, + "loss": 1.4309, + "num_input_tokens_seen": 126489788, + "step": 1910 + }, + { + "epoch": 0.17878036224083868, + "loss": 1.5370368957519531, + "loss_ce": 0.002857227809727192, + "loss_iou": 0.6640625, + "loss_num": 0.04150390625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 126489788, + "step": 1910 + }, + { + "epoch": 0.1788739645247344, + "grad_norm": 42.78955841064453, + "learning_rate": 5e-05, + "loss": 1.607, + "num_input_tokens_seen": 126555532, + "step": 1911 + }, + { + "epoch": 0.1788739645247344, + "loss": 1.694398283958435, + "loss_ce": 0.003968559671193361, + "loss_iou": 0.69921875, + "loss_num": 0.058837890625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 126555532, + "step": 1911 + }, + { + "epoch": 0.17896756680863013, + "grad_norm": 19.902368545532227, + "learning_rate": 5e-05, + "loss": 1.6004, + "num_input_tokens_seen": 126621056, + "step": 1912 + }, + { + "epoch": 0.17896756680863013, + "loss": 1.6208617687225342, + "loss_ce": 0.005627512466162443, + "loss_iou": 0.6953125, + "loss_num": 0.04443359375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 126621056, + "step": 1912 + }, + { + "epoch": 0.17906116909252587, + "grad_norm": 16.653642654418945, + "learning_rate": 5e-05, + "loss": 1.2568, + "num_input_tokens_seen": 126686680, + "step": 1913 + }, + { + "epoch": 0.17906116909252587, + "loss": 1.3326518535614014, + "loss_ce": 0.004526888951659203, + "loss_iou": 0.578125, + "loss_num": 0.033935546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 126686680, + "step": 1913 + }, + { + "epoch": 0.17915477137642158, + "grad_norm": 16.447555541992188, + "learning_rate": 5e-05, + "loss": 1.3708, + "num_input_tokens_seen": 126751944, + "step": 1914 + }, + { + "epoch": 0.17915477137642158, + "loss": 1.5041611194610596, + "loss_ce": 0.0031846007332205772, + "loss_iou": 0.63671875, + "loss_num": 0.045654296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 126751944, + "step": 1914 + }, + { + "epoch": 0.17924837366031732, + "grad_norm": 22.584463119506836, + "learning_rate": 5e-05, + "loss": 1.4771, + "num_input_tokens_seen": 126817988, + "step": 1915 + }, + { + "epoch": 0.17924837366031732, + "loss": 1.708423376083374, + "loss_ce": 0.006274915765970945, + "loss_iou": 0.71484375, + "loss_num": 0.05419921875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 126817988, + "step": 1915 + }, + { + "epoch": 0.17934197594421303, + "grad_norm": 26.603622436523438, + "learning_rate": 5e-05, + "loss": 1.4977, + "num_input_tokens_seen": 126884096, + "step": 1916 + }, + { + "epoch": 0.17934197594421303, + "loss": 1.4041247367858887, + "loss_ce": 0.005198855884373188, + "loss_iou": 0.60546875, + "loss_num": 0.03662109375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 126884096, + "step": 1916 + }, + { + "epoch": 0.17943557822810877, + "grad_norm": 23.41399383544922, + "learning_rate": 5e-05, + "loss": 1.3756, + "num_input_tokens_seen": 126950112, + "step": 1917 + }, + { + "epoch": 0.17943557822810877, + "loss": 1.3776991367340088, + "loss_ce": 0.007581881247460842, + "loss_iou": 0.609375, + "loss_num": 0.0299072265625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 126950112, + "step": 1917 + }, + { + "epoch": 0.1795291805120045, + "grad_norm": 16.151899337768555, + "learning_rate": 5e-05, + "loss": 1.6672, + "num_input_tokens_seen": 127016528, + "step": 1918 + }, + { + "epoch": 0.1795291805120045, + "loss": 1.6332756280899048, + "loss_ce": 0.0063225338235497475, + "loss_iou": 0.7265625, + "loss_num": 0.034912109375, + "loss_xval": 1.625, + "num_input_tokens_seen": 127016528, + "step": 1918 + }, + { + "epoch": 0.17962278279590022, + "grad_norm": 43.21495056152344, + "learning_rate": 5e-05, + "loss": 1.3722, + "num_input_tokens_seen": 127082776, + "step": 1919 + }, + { + "epoch": 0.17962278279590022, + "loss": 1.519390344619751, + "loss_ce": 0.004741910845041275, + "loss_iou": 0.63671875, + "loss_num": 0.04833984375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 127082776, + "step": 1919 + }, + { + "epoch": 0.17971638507979595, + "grad_norm": 82.2913589477539, + "learning_rate": 5e-05, + "loss": 1.2503, + "num_input_tokens_seen": 127148604, + "step": 1920 + }, + { + "epoch": 0.17971638507979595, + "loss": 1.1019983291625977, + "loss_ce": 0.0053185950964689255, + "loss_iou": 0.482421875, + "loss_num": 0.0264892578125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 127148604, + "step": 1920 + }, + { + "epoch": 0.17980998736369166, + "grad_norm": 17.779369354248047, + "learning_rate": 5e-05, + "loss": 1.259, + "num_input_tokens_seen": 127214744, + "step": 1921 + }, + { + "epoch": 0.17980998736369166, + "loss": 1.3687243461608887, + "loss_ce": 0.004466459155082703, + "loss_iou": 0.5859375, + "loss_num": 0.0380859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 127214744, + "step": 1921 + }, + { + "epoch": 0.1799035896475874, + "grad_norm": 33.66106414794922, + "learning_rate": 5e-05, + "loss": 1.627, + "num_input_tokens_seen": 127281020, + "step": 1922 + }, + { + "epoch": 0.1799035896475874, + "loss": 1.5347261428833008, + "loss_ce": 0.004452696070075035, + "loss_iou": 0.640625, + "loss_num": 0.04931640625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 127281020, + "step": 1922 + }, + { + "epoch": 0.17999719193148314, + "grad_norm": 86.88664245605469, + "learning_rate": 5e-05, + "loss": 1.4684, + "num_input_tokens_seen": 127347132, + "step": 1923 + }, + { + "epoch": 0.17999719193148314, + "loss": 1.442563533782959, + "loss_ce": 0.005551830865442753, + "loss_iou": 0.5859375, + "loss_num": 0.05322265625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 127347132, + "step": 1923 + }, + { + "epoch": 0.18009079421537885, + "grad_norm": 21.12995719909668, + "learning_rate": 5e-05, + "loss": 1.5456, + "num_input_tokens_seen": 127412924, + "step": 1924 + }, + { + "epoch": 0.18009079421537885, + "loss": 1.5041178464889526, + "loss_ce": 0.0026530534960329533, + "loss_iou": 0.60546875, + "loss_num": 0.0576171875, + "loss_xval": 1.5, + "num_input_tokens_seen": 127412924, + "step": 1924 + }, + { + "epoch": 0.1801843964992746, + "grad_norm": 39.95879364013672, + "learning_rate": 5e-05, + "loss": 1.4468, + "num_input_tokens_seen": 127479000, + "step": 1925 + }, + { + "epoch": 0.1801843964992746, + "loss": 1.4673662185668945, + "loss_ce": 0.002522457158192992, + "loss_iou": 0.65625, + "loss_num": 0.0311279296875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 127479000, + "step": 1925 + }, + { + "epoch": 0.1802779987831703, + "grad_norm": 24.05704116821289, + "learning_rate": 5e-05, + "loss": 1.6672, + "num_input_tokens_seen": 127543524, + "step": 1926 + }, + { + "epoch": 0.1802779987831703, + "loss": 1.816376805305481, + "loss_ce": 0.003876802045851946, + "loss_iou": 0.7109375, + "loss_num": 0.07763671875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 127543524, + "step": 1926 + }, + { + "epoch": 0.18037160106706604, + "grad_norm": 23.91031837463379, + "learning_rate": 5e-05, + "loss": 1.0682, + "num_input_tokens_seen": 127609152, + "step": 1927 + }, + { + "epoch": 0.18037160106706604, + "loss": 1.0998576879501343, + "loss_ce": 0.004887010902166367, + "loss_iou": 0.380859375, + "loss_num": 0.06640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 127609152, + "step": 1927 + }, + { + "epoch": 0.18046520335096175, + "grad_norm": 17.20271110534668, + "learning_rate": 5e-05, + "loss": 1.2571, + "num_input_tokens_seen": 127676356, + "step": 1928 + }, + { + "epoch": 0.18046520335096175, + "loss": 1.2858045101165771, + "loss_ce": 0.00601929472759366, + "loss_iou": 0.52734375, + "loss_num": 0.04541015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 127676356, + "step": 1928 + }, + { + "epoch": 0.1805588056348575, + "grad_norm": 25.051280975341797, + "learning_rate": 5e-05, + "loss": 1.706, + "num_input_tokens_seen": 127742500, + "step": 1929 + }, + { + "epoch": 0.1805588056348575, + "loss": 1.4824163913726807, + "loss_ce": 0.003412485821172595, + "loss_iou": 0.671875, + "loss_num": 0.0263671875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 127742500, + "step": 1929 + }, + { + "epoch": 0.18065240791875323, + "grad_norm": 28.843645095825195, + "learning_rate": 5e-05, + "loss": 1.1059, + "num_input_tokens_seen": 127808460, + "step": 1930 + }, + { + "epoch": 0.18065240791875323, + "loss": 1.1264175176620483, + "loss_ce": 0.0072769299149513245, + "loss_iou": 0.470703125, + "loss_num": 0.035400390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 127808460, + "step": 1930 + }, + { + "epoch": 0.18074601020264894, + "grad_norm": 14.538792610168457, + "learning_rate": 5e-05, + "loss": 1.3473, + "num_input_tokens_seen": 127874696, + "step": 1931 + }, + { + "epoch": 0.18074601020264894, + "loss": 1.285771131515503, + "loss_ce": 0.0054975831881165504, + "loss_iou": 0.54296875, + "loss_num": 0.0390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 127874696, + "step": 1931 + }, + { + "epoch": 0.18083961248654468, + "grad_norm": 43.93027877807617, + "learning_rate": 5e-05, + "loss": 1.3949, + "num_input_tokens_seen": 127940224, + "step": 1932 + }, + { + "epoch": 0.18083961248654468, + "loss": 1.3779126405715942, + "loss_ce": 0.006818891502916813, + "loss_iou": 0.61328125, + "loss_num": 0.0284423828125, + "loss_xval": 1.375, + "num_input_tokens_seen": 127940224, + "step": 1932 + }, + { + "epoch": 0.1809332147704404, + "grad_norm": 19.06087875366211, + "learning_rate": 5e-05, + "loss": 1.6005, + "num_input_tokens_seen": 128006372, + "step": 1933 + }, + { + "epoch": 0.1809332147704404, + "loss": 1.6286779642105103, + "loss_ce": 0.0017248571384698153, + "loss_iou": 0.7265625, + "loss_num": 0.035400390625, + "loss_xval": 1.625, + "num_input_tokens_seen": 128006372, + "step": 1933 + }, + { + "epoch": 0.18102681705433613, + "grad_norm": 13.02169418334961, + "learning_rate": 5e-05, + "loss": 1.5185, + "num_input_tokens_seen": 128071508, + "step": 1934 + }, + { + "epoch": 0.18102681705433613, + "loss": 1.6327862739562988, + "loss_ce": 0.0014387151459231973, + "loss_iou": 0.69140625, + "loss_num": 0.04931640625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 128071508, + "step": 1934 + }, + { + "epoch": 0.18112041933823186, + "grad_norm": 24.095348358154297, + "learning_rate": 5e-05, + "loss": 1.3805, + "num_input_tokens_seen": 128138652, + "step": 1935 + }, + { + "epoch": 0.18112041933823186, + "loss": 1.2067148685455322, + "loss_ce": 0.00249119708314538, + "loss_iou": 0.5234375, + "loss_num": 0.031494140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 128138652, + "step": 1935 + }, + { + "epoch": 0.18121402162212757, + "grad_norm": 32.35599899291992, + "learning_rate": 5e-05, + "loss": 1.2376, + "num_input_tokens_seen": 128204448, + "step": 1936 + }, + { + "epoch": 0.18121402162212757, + "loss": 1.2077301740646362, + "loss_ce": 0.006558326072990894, + "loss_iou": 0.4609375, + "loss_num": 0.05615234375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 128204448, + "step": 1936 + }, + { + "epoch": 0.1813076239060233, + "grad_norm": 22.592796325683594, + "learning_rate": 5e-05, + "loss": 1.4242, + "num_input_tokens_seen": 128271508, + "step": 1937 + }, + { + "epoch": 0.1813076239060233, + "loss": 1.327092170715332, + "loss_ce": 0.0038501075468957424, + "loss_iou": 0.58203125, + "loss_num": 0.03173828125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 128271508, + "step": 1937 + }, + { + "epoch": 0.18140122618991902, + "grad_norm": 27.72064781188965, + "learning_rate": 5e-05, + "loss": 1.5403, + "num_input_tokens_seen": 128337312, + "step": 1938 + }, + { + "epoch": 0.18140122618991902, + "loss": 1.6330958604812622, + "loss_ce": 0.006142733618617058, + "loss_iou": 0.6875, + "loss_num": 0.051513671875, + "loss_xval": 1.625, + "num_input_tokens_seen": 128337312, + "step": 1938 + }, + { + "epoch": 0.18149482847381476, + "grad_norm": 21.491222381591797, + "learning_rate": 5e-05, + "loss": 1.4238, + "num_input_tokens_seen": 128404084, + "step": 1939 + }, + { + "epoch": 0.18149482847381476, + "loss": 1.4202455282211304, + "loss_ce": 0.0047181760892271996, + "loss_iou": 0.5625, + "loss_num": 0.05908203125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 128404084, + "step": 1939 + }, + { + "epoch": 0.1815884307577105, + "grad_norm": 17.163345336914062, + "learning_rate": 5e-05, + "loss": 1.3903, + "num_input_tokens_seen": 128470460, + "step": 1940 + }, + { + "epoch": 0.1815884307577105, + "loss": 1.354461431503296, + "loss_ce": 0.00827001966536045, + "loss_iou": 0.546875, + "loss_num": 0.05029296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 128470460, + "step": 1940 + }, + { + "epoch": 0.1816820330416062, + "grad_norm": 27.74918556213379, + "learning_rate": 5e-05, + "loss": 1.4035, + "num_input_tokens_seen": 128535812, + "step": 1941 + }, + { + "epoch": 0.1816820330416062, + "loss": 1.4504563808441162, + "loss_ce": 0.0051439437083899975, + "loss_iou": 0.63671875, + "loss_num": 0.03466796875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 128535812, + "step": 1941 + }, + { + "epoch": 0.18177563532550195, + "grad_norm": 24.40339469909668, + "learning_rate": 5e-05, + "loss": 1.7923, + "num_input_tokens_seen": 128601860, + "step": 1942 + }, + { + "epoch": 0.18177563532550195, + "loss": 1.6550748348236084, + "loss_ce": 0.005660798400640488, + "loss_iou": 0.6875, + "loss_num": 0.054443359375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 128601860, + "step": 1942 + }, + { + "epoch": 0.18186923760939766, + "grad_norm": 17.7146053314209, + "learning_rate": 5e-05, + "loss": 1.346, + "num_input_tokens_seen": 128666976, + "step": 1943 + }, + { + "epoch": 0.18186923760939766, + "loss": 1.4426484107971191, + "loss_ce": 0.008078088983893394, + "loss_iou": 0.578125, + "loss_num": 0.054931640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 128666976, + "step": 1943 + }, + { + "epoch": 0.1819628398932934, + "grad_norm": 21.35053062438965, + "learning_rate": 5e-05, + "loss": 1.4457, + "num_input_tokens_seen": 128733712, + "step": 1944 + }, + { + "epoch": 0.1819628398932934, + "loss": 1.3560891151428223, + "loss_ce": 0.0069680060259997845, + "loss_iou": 0.5625, + "loss_num": 0.04541015625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 128733712, + "step": 1944 + }, + { + "epoch": 0.18205644217718914, + "grad_norm": 67.17298889160156, + "learning_rate": 5e-05, + "loss": 1.3814, + "num_input_tokens_seen": 128799752, + "step": 1945 + }, + { + "epoch": 0.18205644217718914, + "loss": 1.3860197067260742, + "loss_ce": 0.00564863346517086, + "loss_iou": 0.5859375, + "loss_num": 0.0419921875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 128799752, + "step": 1945 + }, + { + "epoch": 0.18215004446108485, + "grad_norm": 17.579940795898438, + "learning_rate": 5e-05, + "loss": 1.4157, + "num_input_tokens_seen": 128865872, + "step": 1946 + }, + { + "epoch": 0.18215004446108485, + "loss": 1.3579998016357422, + "loss_ce": 0.006925633177161217, + "loss_iou": 0.5859375, + "loss_num": 0.0361328125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 128865872, + "step": 1946 + }, + { + "epoch": 0.1822436467449806, + "grad_norm": 20.450754165649414, + "learning_rate": 5e-05, + "loss": 1.374, + "num_input_tokens_seen": 128932280, + "step": 1947 + }, + { + "epoch": 0.1822436467449806, + "loss": 1.4243559837341309, + "loss_ce": 0.006387227214872837, + "loss_iou": 0.5859375, + "loss_num": 0.048828125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 128932280, + "step": 1947 + }, + { + "epoch": 0.1823372490288763, + "grad_norm": 46.6253547668457, + "learning_rate": 5e-05, + "loss": 1.44, + "num_input_tokens_seen": 128997716, + "step": 1948 + }, + { + "epoch": 0.1823372490288763, + "loss": 1.4486358165740967, + "loss_ce": 0.006741163786500692, + "loss_iou": 0.60546875, + "loss_num": 0.045654296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 128997716, + "step": 1948 + }, + { + "epoch": 0.18243085131277204, + "grad_norm": 17.697044372558594, + "learning_rate": 5e-05, + "loss": 2.0093, + "num_input_tokens_seen": 129063060, + "step": 1949 + }, + { + "epoch": 0.18243085131277204, + "loss": 2.1196231842041016, + "loss_ce": 0.006341718602925539, + "loss_iou": 0.84375, + "loss_num": 0.0849609375, + "loss_xval": 2.109375, + "num_input_tokens_seen": 129063060, + "step": 1949 + }, + { + "epoch": 0.18252445359666775, + "grad_norm": 21.53302001953125, + "learning_rate": 5e-05, + "loss": 1.3981, + "num_input_tokens_seen": 129129500, + "step": 1950 + }, + { + "epoch": 0.18252445359666775, + "loss": 1.3187754154205322, + "loss_ce": 0.003345734905451536, + "loss_iou": 0.5703125, + "loss_num": 0.034912109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 129129500, + "step": 1950 + }, + { + "epoch": 0.18261805588056348, + "grad_norm": 19.902812957763672, + "learning_rate": 5e-05, + "loss": 1.4631, + "num_input_tokens_seen": 129194880, + "step": 1951 + }, + { + "epoch": 0.18261805588056348, + "loss": 1.3781917095184326, + "loss_ce": 0.007586177904158831, + "loss_iou": 0.5078125, + "loss_num": 0.0712890625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 129194880, + "step": 1951 + }, + { + "epoch": 0.18271165816445922, + "grad_norm": 23.150081634521484, + "learning_rate": 5e-05, + "loss": 1.2857, + "num_input_tokens_seen": 129260348, + "step": 1952 + }, + { + "epoch": 0.18271165816445922, + "loss": 1.0816574096679688, + "loss_ce": 0.004020635969936848, + "loss_iou": 0.462890625, + "loss_num": 0.030517578125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 129260348, + "step": 1952 + }, + { + "epoch": 0.18280526044835493, + "grad_norm": 31.974945068359375, + "learning_rate": 5e-05, + "loss": 1.7205, + "num_input_tokens_seen": 129326728, + "step": 1953 + }, + { + "epoch": 0.18280526044835493, + "loss": 1.5764834880828857, + "loss_ce": 0.003241327591240406, + "loss_iou": 0.69140625, + "loss_num": 0.037353515625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 129326728, + "step": 1953 + }, + { + "epoch": 0.18289886273225067, + "grad_norm": 18.00637435913086, + "learning_rate": 5e-05, + "loss": 1.4208, + "num_input_tokens_seen": 129392436, + "step": 1954 + }, + { + "epoch": 0.18289886273225067, + "loss": 1.4153504371643066, + "loss_ce": 0.0047058966010808945, + "loss_iou": 0.6171875, + "loss_num": 0.035400390625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 129392436, + "step": 1954 + }, + { + "epoch": 0.18299246501614638, + "grad_norm": 20.823671340942383, + "learning_rate": 5e-05, + "loss": 1.3219, + "num_input_tokens_seen": 129458680, + "step": 1955 + }, + { + "epoch": 0.18299246501614638, + "loss": 1.3515501022338867, + "loss_ce": 0.0019407474901527166, + "loss_iou": 0.578125, + "loss_num": 0.0390625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 129458680, + "step": 1955 + }, + { + "epoch": 0.18308606730004212, + "grad_norm": 31.404987335205078, + "learning_rate": 5e-05, + "loss": 1.4798, + "num_input_tokens_seen": 129525600, + "step": 1956 + }, + { + "epoch": 0.18308606730004212, + "loss": 1.5005778074264526, + "loss_ce": 0.00448403786867857, + "loss_iou": 0.61328125, + "loss_num": 0.0546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 129525600, + "step": 1956 + }, + { + "epoch": 0.18317966958393786, + "grad_norm": 18.450042724609375, + "learning_rate": 5e-05, + "loss": 1.4445, + "num_input_tokens_seen": 129591224, + "step": 1957 + }, + { + "epoch": 0.18317966958393786, + "loss": 1.4111019372940063, + "loss_ce": 0.007293255068361759, + "loss_iou": 0.5859375, + "loss_num": 0.045654296875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 129591224, + "step": 1957 + }, + { + "epoch": 0.18327327186783357, + "grad_norm": 14.147624969482422, + "learning_rate": 5e-05, + "loss": 1.2483, + "num_input_tokens_seen": 129657140, + "step": 1958 + }, + { + "epoch": 0.18327327186783357, + "loss": 1.2138912677764893, + "loss_ce": 0.0036861575208604336, + "loss_iou": 0.51171875, + "loss_num": 0.037109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 129657140, + "step": 1958 + }, + { + "epoch": 0.1833668741517293, + "grad_norm": 35.48431396484375, + "learning_rate": 5e-05, + "loss": 1.3859, + "num_input_tokens_seen": 129723552, + "step": 1959 + }, + { + "epoch": 0.1833668741517293, + "loss": 1.332700252532959, + "loss_ce": 0.0035985566210001707, + "loss_iou": 0.5703125, + "loss_num": 0.037109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 129723552, + "step": 1959 + }, + { + "epoch": 0.18346047643562502, + "grad_norm": 31.027301788330078, + "learning_rate": 5e-05, + "loss": 1.59, + "num_input_tokens_seen": 129790336, + "step": 1960 + }, + { + "epoch": 0.18346047643562502, + "loss": 1.6845269203186035, + "loss_ce": 0.010210525244474411, + "loss_iou": 0.703125, + "loss_num": 0.054443359375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 129790336, + "step": 1960 + }, + { + "epoch": 0.18355407871952076, + "grad_norm": 16.66594123840332, + "learning_rate": 5e-05, + "loss": 1.3838, + "num_input_tokens_seen": 129857304, + "step": 1961 + }, + { + "epoch": 0.18355407871952076, + "loss": 1.312317132949829, + "loss_ce": 0.00665307929739356, + "loss_iou": 0.58984375, + "loss_num": 0.0244140625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 129857304, + "step": 1961 + }, + { + "epoch": 0.1836476810034165, + "grad_norm": 96.72372436523438, + "learning_rate": 5e-05, + "loss": 1.4029, + "num_input_tokens_seen": 129923064, + "step": 1962 + }, + { + "epoch": 0.1836476810034165, + "loss": 1.5631399154663086, + "loss_ce": 0.004546206444501877, + "loss_iou": 0.62109375, + "loss_num": 0.06298828125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 129923064, + "step": 1962 + }, + { + "epoch": 0.1837412832873122, + "grad_norm": 21.35594367980957, + "learning_rate": 5e-05, + "loss": 1.5687, + "num_input_tokens_seen": 129989276, + "step": 1963 + }, + { + "epoch": 0.1837412832873122, + "loss": 1.50491464138031, + "loss_ce": 0.0019849720411002636, + "loss_iou": 0.62109375, + "loss_num": 0.05224609375, + "loss_xval": 1.5, + "num_input_tokens_seen": 129989276, + "step": 1963 + }, + { + "epoch": 0.18383488557120795, + "grad_norm": 25.7547664642334, + "learning_rate": 5e-05, + "loss": 1.499, + "num_input_tokens_seen": 130055708, + "step": 1964 + }, + { + "epoch": 0.18383488557120795, + "loss": 1.4371271133422852, + "loss_ce": 0.006463091820478439, + "loss_iou": 0.55859375, + "loss_num": 0.06298828125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 130055708, + "step": 1964 + }, + { + "epoch": 0.18392848785510366, + "grad_norm": 21.432514190673828, + "learning_rate": 5e-05, + "loss": 1.5024, + "num_input_tokens_seen": 130122920, + "step": 1965 + }, + { + "epoch": 0.18392848785510366, + "loss": 1.428673505783081, + "loss_ce": 0.0038687740452587605, + "loss_iou": 0.640625, + "loss_num": 0.029052734375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 130122920, + "step": 1965 + }, + { + "epoch": 0.1840220901389994, + "grad_norm": 39.52536392211914, + "learning_rate": 5e-05, + "loss": 1.3123, + "num_input_tokens_seen": 130189624, + "step": 1966 + }, + { + "epoch": 0.1840220901389994, + "loss": 1.4153207540512085, + "loss_ce": 0.004187947139143944, + "loss_iou": 0.61328125, + "loss_num": 0.036865234375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 130189624, + "step": 1966 + }, + { + "epoch": 0.1841156924228951, + "grad_norm": 26.08038330078125, + "learning_rate": 5e-05, + "loss": 1.4598, + "num_input_tokens_seen": 130255684, + "step": 1967 + }, + { + "epoch": 0.1841156924228951, + "loss": 1.202392816543579, + "loss_ce": 0.0043948134407401085, + "loss_iou": 0.51171875, + "loss_num": 0.034423828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 130255684, + "step": 1967 + }, + { + "epoch": 0.18420929470679084, + "grad_norm": 36.27613067626953, + "learning_rate": 5e-05, + "loss": 1.3115, + "num_input_tokens_seen": 130321496, + "step": 1968 + }, + { + "epoch": 0.18420929470679084, + "loss": 1.3748427629470825, + "loss_ce": 0.004237297922372818, + "loss_iou": 0.5703125, + "loss_num": 0.04638671875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 130321496, + "step": 1968 + }, + { + "epoch": 0.18430289699068658, + "grad_norm": 27.272716522216797, + "learning_rate": 5e-05, + "loss": 1.4727, + "num_input_tokens_seen": 130387560, + "step": 1969 + }, + { + "epoch": 0.18430289699068658, + "loss": 1.5415709018707275, + "loss_ce": 0.006414575502276421, + "loss_iou": 0.67578125, + "loss_num": 0.037109375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 130387560, + "step": 1969 + }, + { + "epoch": 0.1843964992745823, + "grad_norm": 30.705875396728516, + "learning_rate": 5e-05, + "loss": 1.4431, + "num_input_tokens_seen": 130453196, + "step": 1970 + }, + { + "epoch": 0.1843964992745823, + "loss": 1.5475211143493652, + "loss_ce": 0.0045523312874138355, + "loss_iou": 0.6875, + "loss_num": 0.034423828125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 130453196, + "step": 1970 + }, + { + "epoch": 0.18449010155847803, + "grad_norm": 18.834285736083984, + "learning_rate": 5e-05, + "loss": 1.5848, + "num_input_tokens_seen": 130519368, + "step": 1971 + }, + { + "epoch": 0.18449010155847803, + "loss": 1.6733832359313965, + "loss_ce": 0.006391044706106186, + "loss_iou": 0.71484375, + "loss_num": 0.04736328125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 130519368, + "step": 1971 + }, + { + "epoch": 0.18458370384237374, + "grad_norm": 14.840357780456543, + "learning_rate": 5e-05, + "loss": 1.3213, + "num_input_tokens_seen": 130584576, + "step": 1972 + }, + { + "epoch": 0.18458370384237374, + "loss": 1.263106346130371, + "loss_ce": 0.007735134102404118, + "loss_iou": 0.5234375, + "loss_num": 0.041748046875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 130584576, + "step": 1972 + }, + { + "epoch": 0.18467730612626948, + "grad_norm": 13.746054649353027, + "learning_rate": 5e-05, + "loss": 1.2833, + "num_input_tokens_seen": 130650776, + "step": 1973 + }, + { + "epoch": 0.18467730612626948, + "loss": 1.2599341869354248, + "loss_ce": 0.0021216641180217266, + "loss_iou": 0.5390625, + "loss_num": 0.0361328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 130650776, + "step": 1973 + }, + { + "epoch": 0.18477090841016522, + "grad_norm": 14.861371994018555, + "learning_rate": 5e-05, + "loss": 1.1308, + "num_input_tokens_seen": 130716800, + "step": 1974 + }, + { + "epoch": 0.18477090841016522, + "loss": 1.0687583684921265, + "loss_ce": 0.005525918677449226, + "loss_iou": 0.45703125, + "loss_num": 0.030029296875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 130716800, + "step": 1974 + }, + { + "epoch": 0.18486451069406093, + "grad_norm": 17.192245483398438, + "learning_rate": 5e-05, + "loss": 1.3216, + "num_input_tokens_seen": 130782332, + "step": 1975 + }, + { + "epoch": 0.18486451069406093, + "loss": 1.2852097749710083, + "loss_ce": 0.0061570508405566216, + "loss_iou": 0.54296875, + "loss_num": 0.03857421875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 130782332, + "step": 1975 + }, + { + "epoch": 0.18495811297795667, + "grad_norm": 25.762100219726562, + "learning_rate": 5e-05, + "loss": 1.3482, + "num_input_tokens_seen": 130847928, + "step": 1976 + }, + { + "epoch": 0.18495811297795667, + "loss": 1.269754409790039, + "loss_ce": 0.0016880759503692389, + "loss_iou": 0.57421875, + "loss_num": 0.024658203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 130847928, + "step": 1976 + }, + { + "epoch": 0.18505171526185238, + "grad_norm": 119.86146545410156, + "learning_rate": 5e-05, + "loss": 1.7387, + "num_input_tokens_seen": 130914068, + "step": 1977 + }, + { + "epoch": 0.18505171526185238, + "loss": 2.0166561603546143, + "loss_ce": 0.002984270453453064, + "loss_iou": 0.8125, + "loss_num": 0.0771484375, + "loss_xval": 2.015625, + "num_input_tokens_seen": 130914068, + "step": 1977 + }, + { + "epoch": 0.18514531754574812, + "grad_norm": 12.953497886657715, + "learning_rate": 5e-05, + "loss": 1.3247, + "num_input_tokens_seen": 130980532, + "step": 1978 + }, + { + "epoch": 0.18514531754574812, + "loss": 1.5354790687561035, + "loss_ce": 0.004229026846587658, + "loss_iou": 0.62890625, + "loss_num": 0.055419921875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 130980532, + "step": 1978 + }, + { + "epoch": 0.18523891982964386, + "grad_norm": 17.847932815551758, + "learning_rate": 5e-05, + "loss": 1.2552, + "num_input_tokens_seen": 131043352, + "step": 1979 + }, + { + "epoch": 0.18523891982964386, + "loss": 1.4122639894485474, + "loss_ce": 0.004549109376966953, + "loss_iou": 0.5390625, + "loss_num": 0.06689453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 131043352, + "step": 1979 + }, + { + "epoch": 0.18533252211353957, + "grad_norm": 18.029878616333008, + "learning_rate": 5e-05, + "loss": 1.5715, + "num_input_tokens_seen": 131109000, + "step": 1980 + }, + { + "epoch": 0.18533252211353957, + "loss": 1.5618164539337158, + "loss_ce": 0.012011634185910225, + "loss_iou": 0.59765625, + "loss_num": 0.0703125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 131109000, + "step": 1980 + }, + { + "epoch": 0.1854261243974353, + "grad_norm": 39.68682098388672, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 131175640, + "step": 1981 + }, + { + "epoch": 0.1854261243974353, + "loss": 1.1519880294799805, + "loss_ce": 0.006968412082642317, + "loss_iou": 0.484375, + "loss_num": 0.034912109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 131175640, + "step": 1981 + }, + { + "epoch": 0.18551972668133102, + "grad_norm": 16.826982498168945, + "learning_rate": 5e-05, + "loss": 1.2313, + "num_input_tokens_seen": 131242008, + "step": 1982 + }, + { + "epoch": 0.18551972668133102, + "loss": 1.4770022630691528, + "loss_ce": 0.007763974368572235, + "loss_iou": 0.58984375, + "loss_num": 0.0576171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 131242008, + "step": 1982 + }, + { + "epoch": 0.18561332896522675, + "grad_norm": 43.08837890625, + "learning_rate": 5e-05, + "loss": 1.5267, + "num_input_tokens_seen": 131308212, + "step": 1983 + }, + { + "epoch": 0.18561332896522675, + "loss": 1.6214125156402588, + "loss_ce": 0.005201567430049181, + "loss_iou": 0.6484375, + "loss_num": 0.0634765625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 131308212, + "step": 1983 + }, + { + "epoch": 0.1857069312491225, + "grad_norm": 18.7119083404541, + "learning_rate": 5e-05, + "loss": 1.6566, + "num_input_tokens_seen": 131376164, + "step": 1984 + }, + { + "epoch": 0.1857069312491225, + "loss": 1.633873462677002, + "loss_ce": 0.00399056589230895, + "loss_iou": 0.6640625, + "loss_num": 0.06103515625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 131376164, + "step": 1984 + }, + { + "epoch": 0.1858005335330182, + "grad_norm": 66.57056427001953, + "learning_rate": 5e-05, + "loss": 1.3089, + "num_input_tokens_seen": 131442472, + "step": 1985 + }, + { + "epoch": 0.1858005335330182, + "loss": 1.2516813278198242, + "loss_ce": 0.0038785517681390047, + "loss_iou": 0.478515625, + "loss_num": 0.05810546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 131442472, + "step": 1985 + }, + { + "epoch": 0.18589413581691394, + "grad_norm": 30.466259002685547, + "learning_rate": 5e-05, + "loss": 1.4092, + "num_input_tokens_seen": 131508660, + "step": 1986 + }, + { + "epoch": 0.18589413581691394, + "loss": 1.365430474281311, + "loss_ce": 0.002637461293488741, + "loss_iou": 0.55078125, + "loss_num": 0.0517578125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 131508660, + "step": 1986 + }, + { + "epoch": 0.18598773810080965, + "grad_norm": 19.79939842224121, + "learning_rate": 5e-05, + "loss": 1.703, + "num_input_tokens_seen": 131575048, + "step": 1987 + }, + { + "epoch": 0.18598773810080965, + "loss": 1.8632733821868896, + "loss_ce": 0.004386810585856438, + "loss_iou": 0.71875, + "loss_num": 0.083984375, + "loss_xval": 1.859375, + "num_input_tokens_seen": 131575048, + "step": 1987 + }, + { + "epoch": 0.1860813403847054, + "grad_norm": 13.872488975524902, + "learning_rate": 5e-05, + "loss": 1.275, + "num_input_tokens_seen": 131641564, + "step": 1988 + }, + { + "epoch": 0.1860813403847054, + "loss": 1.226672649383545, + "loss_ce": 0.006945985835045576, + "loss_iou": 0.5234375, + "loss_num": 0.034912109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 131641564, + "step": 1988 + }, + { + "epoch": 0.1861749426686011, + "grad_norm": 24.429122924804688, + "learning_rate": 5e-05, + "loss": 1.1989, + "num_input_tokens_seen": 131705996, + "step": 1989 + }, + { + "epoch": 0.1861749426686011, + "loss": 1.265514850616455, + "loss_ce": 0.003796099917963147, + "loss_iou": 0.48828125, + "loss_num": 0.056884765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 131705996, + "step": 1989 + }, + { + "epoch": 0.18626854495249684, + "grad_norm": 16.61542320251465, + "learning_rate": 5e-05, + "loss": 1.2852, + "num_input_tokens_seen": 131771648, + "step": 1990 + }, + { + "epoch": 0.18626854495249684, + "loss": 1.1972167491912842, + "loss_ce": 0.004833961371332407, + "loss_iou": 0.5, + "loss_num": 0.03759765625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 131771648, + "step": 1990 + }, + { + "epoch": 0.18636214723639258, + "grad_norm": 20.98232650756836, + "learning_rate": 5e-05, + "loss": 1.3515, + "num_input_tokens_seen": 131838160, + "step": 1991 + }, + { + "epoch": 0.18636214723639258, + "loss": 1.2970855236053467, + "loss_ce": 0.004116716329008341, + "loss_iou": 0.55078125, + "loss_num": 0.037841796875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 131838160, + "step": 1991 + }, + { + "epoch": 0.1864557495202883, + "grad_norm": 20.897262573242188, + "learning_rate": 5e-05, + "loss": 1.5175, + "num_input_tokens_seen": 131903900, + "step": 1992 + }, + { + "epoch": 0.1864557495202883, + "loss": 1.455235481262207, + "loss_ce": 0.005040129646658897, + "loss_iou": 0.62109375, + "loss_num": 0.041748046875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 131903900, + "step": 1992 + }, + { + "epoch": 0.18654935180418403, + "grad_norm": 26.697927474975586, + "learning_rate": 5e-05, + "loss": 1.4688, + "num_input_tokens_seen": 131970484, + "step": 1993 + }, + { + "epoch": 0.18654935180418403, + "loss": 1.4565171003341675, + "loss_ce": 0.011692902073264122, + "loss_iou": 0.58203125, + "loss_num": 0.055908203125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 131970484, + "step": 1993 + }, + { + "epoch": 0.18664295408807974, + "grad_norm": 16.57448387145996, + "learning_rate": 5e-05, + "loss": 1.504, + "num_input_tokens_seen": 132036380, + "step": 1994 + }, + { + "epoch": 0.18664295408807974, + "loss": 1.5036506652832031, + "loss_ce": 0.005603712517768145, + "loss_iou": 0.66015625, + "loss_num": 0.0361328125, + "loss_xval": 1.5, + "num_input_tokens_seen": 132036380, + "step": 1994 + }, + { + "epoch": 0.18673655637197548, + "grad_norm": 13.335344314575195, + "learning_rate": 5e-05, + "loss": 1.5174, + "num_input_tokens_seen": 132101980, + "step": 1995 + }, + { + "epoch": 0.18673655637197548, + "loss": 1.5372298955917358, + "loss_ce": 0.004515079781413078, + "loss_iou": 0.60546875, + "loss_num": 0.06396484375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 132101980, + "step": 1995 + }, + { + "epoch": 0.18683015865587121, + "grad_norm": 13.998605728149414, + "learning_rate": 5e-05, + "loss": 1.2338, + "num_input_tokens_seen": 132167712, + "step": 1996 + }, + { + "epoch": 0.18683015865587121, + "loss": 1.4663426876068115, + "loss_ce": 0.004916940815746784, + "loss_iou": 0.6015625, + "loss_num": 0.052001953125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 132167712, + "step": 1996 + }, + { + "epoch": 0.18692376093976693, + "grad_norm": 34.171142578125, + "learning_rate": 5e-05, + "loss": 1.2822, + "num_input_tokens_seen": 132233348, + "step": 1997 + }, + { + "epoch": 0.18692376093976693, + "loss": 1.3784549236297607, + "loss_ce": 0.005408023484051228, + "loss_iou": 0.59765625, + "loss_num": 0.035888671875, + "loss_xval": 1.375, + "num_input_tokens_seen": 132233348, + "step": 1997 + }, + { + "epoch": 0.18701736322366266, + "grad_norm": 42.80735397338867, + "learning_rate": 5e-05, + "loss": 1.6852, + "num_input_tokens_seen": 132299644, + "step": 1998 + }, + { + "epoch": 0.18701736322366266, + "loss": 1.6969330310821533, + "loss_ce": 0.006503298878669739, + "loss_iou": 0.69921875, + "loss_num": 0.057861328125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 132299644, + "step": 1998 + }, + { + "epoch": 0.18711096550755837, + "grad_norm": 20.92772674560547, + "learning_rate": 5e-05, + "loss": 1.203, + "num_input_tokens_seen": 132366452, + "step": 1999 + }, + { + "epoch": 0.18711096550755837, + "loss": 1.2389694452285767, + "loss_ce": 0.009355181828141212, + "loss_iou": 0.50390625, + "loss_num": 0.044189453125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 132366452, + "step": 1999 + }, + { + "epoch": 0.1872045677914541, + "grad_norm": 30.069229125976562, + "learning_rate": 5e-05, + "loss": 1.5779, + "num_input_tokens_seen": 132432716, + "step": 2000 + }, + { + "epoch": 0.1872045677914541, + "eval_seeclick_CIoU": 0.11575577221810818, + "eval_seeclick_GIoU": 0.1071876734495163, + "eval_seeclick_IoU": 0.23197105526924133, + "eval_seeclick_MAE_all": 0.13012679666280746, + "eval_seeclick_MAE_h": 0.06467602401971817, + "eval_seeclick_MAE_w": 0.10543861240148544, + "eval_seeclick_MAE_x_boxes": 0.2087840437889099, + "eval_seeclick_MAE_y_boxes": 0.10963879711925983, + "eval_seeclick_NUM_probability": 0.9996125400066376, + "eval_seeclick_inside_bbox": 0.2760416716337204, + "eval_seeclick_loss": 2.5171961784362793, + "eval_seeclick_loss_ce": 0.013720860704779625, + "eval_seeclick_loss_iou": 0.9281005859375, + "eval_seeclick_loss_num": 0.12964630126953125, + "eval_seeclick_loss_xval": 2.505859375, + "eval_seeclick_runtime": 69.1578, + "eval_seeclick_samples_per_second": 0.68, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 132432716, + "step": 2000 + }, + { + "epoch": 0.1872045677914541, + "eval_icons_CIoU": -0.06847554817795753, + "eval_icons_GIoU": -0.0004963139072060585, + "eval_icons_IoU": 0.11401020735502243, + "eval_icons_MAE_all": 0.172628253698349, + "eval_icons_MAE_h": 0.24317608773708344, + "eval_icons_MAE_w": 0.13153423741459846, + "eval_icons_MAE_x_boxes": 0.1154375858604908, + "eval_icons_MAE_y_boxes": 0.0758729837834835, + "eval_icons_NUM_probability": 0.9998328685760498, + "eval_icons_inside_bbox": 0.1770833358168602, + "eval_icons_loss": 2.8988170623779297, + "eval_icons_loss_ce": 6.705435225740075e-05, + "eval_icons_loss_iou": 1.011962890625, + "eval_icons_loss_num": 0.18157958984375, + "eval_icons_loss_xval": 2.931640625, + "eval_icons_runtime": 65.7634, + "eval_icons_samples_per_second": 0.76, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 132432716, + "step": 2000 + }, + { + "epoch": 0.1872045677914541, + "eval_screenspot_CIoU": -0.03418503267069658, + "eval_screenspot_GIoU": -0.018533222377300262, + "eval_screenspot_IoU": 0.15474786361058554, + "eval_screenspot_MAE_all": 0.22027545173962912, + "eval_screenspot_MAE_h": 0.23204520344734192, + "eval_screenspot_MAE_w": 0.19964665671189627, + "eval_screenspot_MAE_x_boxes": 0.26087622344493866, + "eval_screenspot_MAE_y_boxes": 0.11870437612136205, + "eval_screenspot_NUM_probability": 0.999854306379954, + "eval_screenspot_inside_bbox": 0.3149999976158142, + "eval_screenspot_loss": 3.1787924766540527, + "eval_screenspot_loss_ce": 0.010288806942602, + "eval_screenspot_loss_iou": 1.0364583333333333, + "eval_screenspot_loss_num": 0.22825113932291666, + "eval_screenspot_loss_xval": 3.21484375, + "eval_screenspot_runtime": 110.6768, + "eval_screenspot_samples_per_second": 0.804, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 132432716, + "step": 2000 + }, + { + "epoch": 0.1872045677914541, + "eval_compot_CIoU": -0.08787180855870247, + "eval_compot_GIoU": -0.045600379817187786, + "eval_compot_IoU": 0.09643351286649704, + "eval_compot_MAE_all": 0.20011915266513824, + "eval_compot_MAE_h": 0.1415417194366455, + "eval_compot_MAE_w": 0.21793979406356812, + "eval_compot_MAE_x_boxes": 0.20072585344314575, + "eval_compot_MAE_y_boxes": 0.09426753595471382, + "eval_compot_NUM_probability": 0.9998772144317627, + "eval_compot_inside_bbox": 0.2170138955116272, + "eval_compot_loss": 3.152869939804077, + "eval_compot_loss_ce": 0.002876733778975904, + "eval_compot_loss_iou": 1.078125, + "eval_compot_loss_num": 0.204376220703125, + "eval_compot_loss_xval": 3.177734375, + "eval_compot_runtime": 77.9382, + "eval_compot_samples_per_second": 0.642, + "eval_compot_steps_per_second": 0.026, + "num_input_tokens_seen": 132432716, + "step": 2000 + }, + { + "epoch": 0.1872045677914541, + "eval_custom_ui_MAE_all": 0.15138039737939835, + "eval_custom_ui_MAE_x": 0.1302195005118847, + "eval_custom_ui_MAE_y": 0.1725413054227829, + "eval_custom_ui_NUM_probability": 0.9990260899066925, + "eval_custom_ui_loss": 0.8326959013938904, + "eval_custom_ui_loss_ce": 0.13630567491054535, + "eval_custom_ui_loss_num": 0.149688720703125, + "eval_custom_ui_loss_xval": 0.7489013671875, + "eval_custom_ui_runtime": 51.0925, + "eval_custom_ui_samples_per_second": 0.979, + "eval_custom_ui_steps_per_second": 0.039, + "num_input_tokens_seen": 132432716, + "step": 2000 + }, + { + "epoch": 0.1872045677914541, + "loss": 0.9159218072891235, + "loss_ce": 0.14614643156528473, + "loss_iou": 0.0, + "loss_num": 0.154296875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 132432716, + "step": 2000 + }, + { + "epoch": 0.18729817007534985, + "grad_norm": 36.8077392578125, + "learning_rate": 5e-05, + "loss": 1.5314, + "num_input_tokens_seen": 132499200, + "step": 2001 + }, + { + "epoch": 0.18729817007534985, + "loss": 1.541508436203003, + "loss_ce": 0.0053756022825837135, + "loss_iou": 0.640625, + "loss_num": 0.051513671875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 132499200, + "step": 2001 + }, + { + "epoch": 0.18739177235924556, + "grad_norm": 22.555620193481445, + "learning_rate": 5e-05, + "loss": 1.4048, + "num_input_tokens_seen": 132566316, + "step": 2002 + }, + { + "epoch": 0.18739177235924556, + "loss": 1.3378509283065796, + "loss_ce": 0.0019134902395308018, + "loss_iou": 0.55859375, + "loss_num": 0.043701171875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 132566316, + "step": 2002 + }, + { + "epoch": 0.1874853746431413, + "grad_norm": 18.991727828979492, + "learning_rate": 5e-05, + "loss": 1.2057, + "num_input_tokens_seen": 132631852, + "step": 2003 + }, + { + "epoch": 0.1874853746431413, + "loss": 1.2083985805511475, + "loss_ce": 0.00612806249409914, + "loss_iou": 0.498046875, + "loss_num": 0.04150390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 132631852, + "step": 2003 + }, + { + "epoch": 0.187578976927037, + "grad_norm": 26.7177677154541, + "learning_rate": 5e-05, + "loss": 1.5627, + "num_input_tokens_seen": 132697524, + "step": 2004 + }, + { + "epoch": 0.187578976927037, + "loss": 1.4155142307281494, + "loss_ce": 0.006334573030471802, + "loss_iou": 0.59375, + "loss_num": 0.04345703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 132697524, + "step": 2004 + }, + { + "epoch": 0.18767257921093275, + "grad_norm": 31.37240982055664, + "learning_rate": 5e-05, + "loss": 1.5654, + "num_input_tokens_seen": 132762980, + "step": 2005 + }, + { + "epoch": 0.18767257921093275, + "loss": 1.430168628692627, + "loss_ce": 0.0034108958207070827, + "loss_iou": 0.640625, + "loss_num": 0.029541015625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 132762980, + "step": 2005 + }, + { + "epoch": 0.18776618149482846, + "grad_norm": 22.789878845214844, + "learning_rate": 5e-05, + "loss": 1.5458, + "num_input_tokens_seen": 132828428, + "step": 2006 + }, + { + "epoch": 0.18776618149482846, + "loss": 1.5693515539169312, + "loss_ce": 0.005386617965996265, + "loss_iou": 0.6875, + "loss_num": 0.037109375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 132828428, + "step": 2006 + }, + { + "epoch": 0.1878597837787242, + "grad_norm": 23.16802406311035, + "learning_rate": 5e-05, + "loss": 1.3025, + "num_input_tokens_seen": 132894728, + "step": 2007 + }, + { + "epoch": 0.1878597837787242, + "loss": 1.2459523677825928, + "loss_ce": 0.004741411656141281, + "loss_iou": 0.546875, + "loss_num": 0.029541015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 132894728, + "step": 2007 + }, + { + "epoch": 0.18795338606261994, + "grad_norm": 18.789785385131836, + "learning_rate": 5e-05, + "loss": 1.3813, + "num_input_tokens_seen": 132961028, + "step": 2008 + }, + { + "epoch": 0.18795338606261994, + "loss": 1.3989194631576538, + "loss_ce": 0.007317899260669947, + "loss_iou": 0.6171875, + "loss_num": 0.03125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 132961028, + "step": 2008 + }, + { + "epoch": 0.18804698834651565, + "grad_norm": 36.02462387084961, + "learning_rate": 5e-05, + "loss": 1.5948, + "num_input_tokens_seen": 133027816, + "step": 2009 + }, + { + "epoch": 0.18804698834651565, + "loss": 1.7780526876449585, + "loss_ce": 0.011451136320829391, + "loss_iou": 0.703125, + "loss_num": 0.0732421875, + "loss_xval": 1.765625, + "num_input_tokens_seen": 133027816, + "step": 2009 + }, + { + "epoch": 0.1881405906304114, + "grad_norm": 17.939682006835938, + "learning_rate": 5e-05, + "loss": 1.4303, + "num_input_tokens_seen": 133093232, + "step": 2010 + }, + { + "epoch": 0.1881405906304114, + "loss": 1.3121365308761597, + "loss_ce": 0.008486653678119183, + "loss_iou": 0.5546875, + "loss_num": 0.038818359375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 133093232, + "step": 2010 + }, + { + "epoch": 0.1882341929143071, + "grad_norm": 19.468914031982422, + "learning_rate": 5e-05, + "loss": 1.3176, + "num_input_tokens_seen": 133159060, + "step": 2011 + }, + { + "epoch": 0.1882341929143071, + "loss": 1.310737133026123, + "loss_ce": 0.004096532706171274, + "loss_iou": 0.55859375, + "loss_num": 0.038330078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 133159060, + "step": 2011 + }, + { + "epoch": 0.18832779519820284, + "grad_norm": 26.613019943237305, + "learning_rate": 5e-05, + "loss": 1.4038, + "num_input_tokens_seen": 133226052, + "step": 2012 + }, + { + "epoch": 0.18832779519820284, + "loss": 1.391554355621338, + "loss_ce": 0.00971841812133789, + "loss_iou": 0.5703125, + "loss_num": 0.048095703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 133226052, + "step": 2012 + }, + { + "epoch": 0.18842139748209857, + "grad_norm": 22.770835876464844, + "learning_rate": 5e-05, + "loss": 1.4995, + "num_input_tokens_seen": 133292700, + "step": 2013 + }, + { + "epoch": 0.18842139748209857, + "loss": 1.5055845975875854, + "loss_ce": 0.007049466483294964, + "loss_iou": 0.6171875, + "loss_num": 0.052490234375, + "loss_xval": 1.5, + "num_input_tokens_seen": 133292700, + "step": 2013 + }, + { + "epoch": 0.18851499976599428, + "grad_norm": 31.13936424255371, + "learning_rate": 5e-05, + "loss": 1.3374, + "num_input_tokens_seen": 133358588, + "step": 2014 + }, + { + "epoch": 0.18851499976599428, + "loss": 1.3625235557556152, + "loss_ce": 0.006780206225812435, + "loss_iou": 0.6015625, + "loss_num": 0.030029296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 133358588, + "step": 2014 + }, + { + "epoch": 0.18860860204989002, + "grad_norm": 21.765378952026367, + "learning_rate": 5e-05, + "loss": 1.6432, + "num_input_tokens_seen": 133424892, + "step": 2015 + }, + { + "epoch": 0.18860860204989002, + "loss": 1.6781728267669678, + "loss_ce": 0.0044667646288871765, + "loss_iou": 0.67578125, + "loss_num": 0.0654296875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 133424892, + "step": 2015 + }, + { + "epoch": 0.18870220433378573, + "grad_norm": 21.015169143676758, + "learning_rate": 5e-05, + "loss": 1.4138, + "num_input_tokens_seen": 133490260, + "step": 2016 + }, + { + "epoch": 0.18870220433378573, + "loss": 1.2584657669067383, + "loss_ce": 0.0017519108951091766, + "loss_iou": 0.498046875, + "loss_num": 0.05224609375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 133490260, + "step": 2016 + }, + { + "epoch": 0.18879580661768147, + "grad_norm": 45.764705657958984, + "learning_rate": 5e-05, + "loss": 1.6168, + "num_input_tokens_seen": 133557864, + "step": 2017 + }, + { + "epoch": 0.18879580661768147, + "loss": 1.614818811416626, + "loss_ce": 0.0015376550145447254, + "loss_iou": 0.7265625, + "loss_num": 0.03125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 133557864, + "step": 2017 + }, + { + "epoch": 0.1888894089015772, + "grad_norm": 20.390649795532227, + "learning_rate": 5e-05, + "loss": 1.7049, + "num_input_tokens_seen": 133624080, + "step": 2018 + }, + { + "epoch": 0.1888894089015772, + "loss": 1.7868666648864746, + "loss_ce": 0.0036635464057326317, + "loss_iou": 0.73828125, + "loss_num": 0.061767578125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 133624080, + "step": 2018 + }, + { + "epoch": 0.18898301118547292, + "grad_norm": 23.811748504638672, + "learning_rate": 5e-05, + "loss": 1.4238, + "num_input_tokens_seen": 133691348, + "step": 2019 + }, + { + "epoch": 0.18898301118547292, + "loss": 1.4709348678588867, + "loss_ce": 0.005602835211902857, + "loss_iou": 0.64453125, + "loss_num": 0.035400390625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 133691348, + "step": 2019 + }, + { + "epoch": 0.18907661346936866, + "grad_norm": 30.91826629638672, + "learning_rate": 5e-05, + "loss": 1.3329, + "num_input_tokens_seen": 133758316, + "step": 2020 + }, + { + "epoch": 0.18907661346936866, + "loss": 1.3241961002349854, + "loss_ce": 0.0019304631277918816, + "loss_iou": 0.5859375, + "loss_num": 0.0303955078125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 133758316, + "step": 2020 + }, + { + "epoch": 0.18917021575326437, + "grad_norm": 20.472759246826172, + "learning_rate": 5e-05, + "loss": 1.7706, + "num_input_tokens_seen": 133826016, + "step": 2021 + }, + { + "epoch": 0.18917021575326437, + "loss": 1.650133728981018, + "loss_ce": 0.004625923000276089, + "loss_iou": 0.70703125, + "loss_num": 0.046142578125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 133826016, + "step": 2021 + }, + { + "epoch": 0.1892638180371601, + "grad_norm": 28.499732971191406, + "learning_rate": 5e-05, + "loss": 1.4081, + "num_input_tokens_seen": 133892244, + "step": 2022 + }, + { + "epoch": 0.1892638180371601, + "loss": 1.5334014892578125, + "loss_ce": 0.004104674328118563, + "loss_iou": 0.640625, + "loss_num": 0.050048828125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 133892244, + "step": 2022 + }, + { + "epoch": 0.18935742032105585, + "grad_norm": 43.2701301574707, + "learning_rate": 5e-05, + "loss": 1.3486, + "num_input_tokens_seen": 133957804, + "step": 2023 + }, + { + "epoch": 0.18935742032105585, + "loss": 1.3434921503067017, + "loss_ce": 0.003160080872476101, + "loss_iou": 0.5859375, + "loss_num": 0.034423828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 133957804, + "step": 2023 + }, + { + "epoch": 0.18945102260495156, + "grad_norm": 24.83196258544922, + "learning_rate": 5e-05, + "loss": 1.4019, + "num_input_tokens_seen": 134024396, + "step": 2024 + }, + { + "epoch": 0.18945102260495156, + "loss": 1.3687567710876465, + "loss_ce": 0.0015692950692027807, + "loss_iou": 0.59375, + "loss_num": 0.035400390625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 134024396, + "step": 2024 + }, + { + "epoch": 0.1895446248888473, + "grad_norm": 31.510297775268555, + "learning_rate": 5e-05, + "loss": 1.4844, + "num_input_tokens_seen": 134091252, + "step": 2025 + }, + { + "epoch": 0.1895446248888473, + "loss": 1.485026240348816, + "loss_ce": 0.006510650739073753, + "loss_iou": 0.625, + "loss_num": 0.044677734375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 134091252, + "step": 2025 + }, + { + "epoch": 0.189638227172743, + "grad_norm": 15.40998649597168, + "learning_rate": 5e-05, + "loss": 1.4777, + "num_input_tokens_seen": 134158108, + "step": 2026 + }, + { + "epoch": 0.189638227172743, + "loss": 0.9744458198547363, + "loss_ce": 0.008137264288961887, + "loss_iou": 0.416015625, + "loss_num": 0.0264892578125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 134158108, + "step": 2026 + }, + { + "epoch": 0.18973182945663875, + "grad_norm": 79.20665740966797, + "learning_rate": 5e-05, + "loss": 1.4997, + "num_input_tokens_seen": 134225672, + "step": 2027 + }, + { + "epoch": 0.18973182945663875, + "loss": 1.300979495048523, + "loss_ce": 0.0021513812243938446, + "loss_iou": 0.5703125, + "loss_num": 0.03173828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 134225672, + "step": 2027 + }, + { + "epoch": 0.18982543174053446, + "grad_norm": 22.0035457611084, + "learning_rate": 5e-05, + "loss": 1.2491, + "num_input_tokens_seen": 134291564, + "step": 2028 + }, + { + "epoch": 0.18982543174053446, + "loss": 1.1726512908935547, + "loss_ce": 0.008924500085413456, + "loss_iou": 0.494140625, + "loss_num": 0.034912109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 134291564, + "step": 2028 + }, + { + "epoch": 0.1899190340244302, + "grad_norm": 24.686311721801758, + "learning_rate": 5e-05, + "loss": 1.3878, + "num_input_tokens_seen": 134357992, + "step": 2029 + }, + { + "epoch": 0.1899190340244302, + "loss": 1.517027497291565, + "loss_ce": 0.0033556390553712845, + "loss_iou": 0.65625, + "loss_num": 0.041015625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 134357992, + "step": 2029 + }, + { + "epoch": 0.19001263630832593, + "grad_norm": 35.2572135925293, + "learning_rate": 5e-05, + "loss": 1.5802, + "num_input_tokens_seen": 134424524, + "step": 2030 + }, + { + "epoch": 0.19001263630832593, + "loss": 1.7430429458618164, + "loss_ce": 0.003785067005082965, + "loss_iou": 0.74609375, + "loss_num": 0.048828125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 134424524, + "step": 2030 + }, + { + "epoch": 0.19010623859222164, + "grad_norm": 22.11330795288086, + "learning_rate": 5e-05, + "loss": 1.5555, + "num_input_tokens_seen": 134489824, + "step": 2031 + }, + { + "epoch": 0.19010623859222164, + "loss": 1.5073288679122925, + "loss_ce": 0.0014695466961711645, + "loss_iou": 0.69140625, + "loss_num": 0.024169921875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 134489824, + "step": 2031 + }, + { + "epoch": 0.19019984087611738, + "grad_norm": 24.93625831604004, + "learning_rate": 5e-05, + "loss": 1.4622, + "num_input_tokens_seen": 134556340, + "step": 2032 + }, + { + "epoch": 0.19019984087611738, + "loss": 1.3464317321777344, + "loss_ce": 0.003658217377960682, + "loss_iou": 0.55859375, + "loss_num": 0.044921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 134556340, + "step": 2032 + }, + { + "epoch": 0.1902934431600131, + "grad_norm": 19.162181854248047, + "learning_rate": 5e-05, + "loss": 1.3348, + "num_input_tokens_seen": 134622728, + "step": 2033 + }, + { + "epoch": 0.1902934431600131, + "loss": 1.2705814838409424, + "loss_ce": 0.004468115046620369, + "loss_iou": 0.5390625, + "loss_num": 0.036865234375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 134622728, + "step": 2033 + }, + { + "epoch": 0.19038704544390883, + "grad_norm": 76.74585723876953, + "learning_rate": 5e-05, + "loss": 1.5657, + "num_input_tokens_seen": 134688572, + "step": 2034 + }, + { + "epoch": 0.19038704544390883, + "loss": 1.6774089336395264, + "loss_ce": 0.0035807411186397076, + "loss_iou": 0.69140625, + "loss_num": 0.0576171875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 134688572, + "step": 2034 + }, + { + "epoch": 0.19048064772780457, + "grad_norm": 19.1229248046875, + "learning_rate": 5e-05, + "loss": 1.5923, + "num_input_tokens_seen": 134754856, + "step": 2035 + }, + { + "epoch": 0.19048064772780457, + "loss": 1.5887892246246338, + "loss_ce": 0.0052930498495697975, + "loss_iou": 0.6640625, + "loss_num": 0.052001953125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 134754856, + "step": 2035 + }, + { + "epoch": 0.19057425001170028, + "grad_norm": 17.247560501098633, + "learning_rate": 5e-05, + "loss": 1.4603, + "num_input_tokens_seen": 134821232, + "step": 2036 + }, + { + "epoch": 0.19057425001170028, + "loss": 1.5276718139648438, + "loss_ce": 0.0042343344539403915, + "loss_iou": 0.63671875, + "loss_num": 0.0498046875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 134821232, + "step": 2036 + }, + { + "epoch": 0.19066785229559602, + "grad_norm": 45.40583038330078, + "learning_rate": 5e-05, + "loss": 1.4312, + "num_input_tokens_seen": 134887156, + "step": 2037 + }, + { + "epoch": 0.19066785229559602, + "loss": 1.5920612812042236, + "loss_ce": 0.005147155839949846, + "loss_iou": 0.65234375, + "loss_num": 0.056884765625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 134887156, + "step": 2037 + }, + { + "epoch": 0.19076145457949173, + "grad_norm": 24.952116012573242, + "learning_rate": 5e-05, + "loss": 1.7742, + "num_input_tokens_seen": 134953656, + "step": 2038 + }, + { + "epoch": 0.19076145457949173, + "loss": 1.8267306089401245, + "loss_ce": 0.005441478453576565, + "loss_iou": 0.75390625, + "loss_num": 0.06298828125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 134953656, + "step": 2038 + }, + { + "epoch": 0.19085505686338747, + "grad_norm": 31.87831687927246, + "learning_rate": 5e-05, + "loss": 1.2528, + "num_input_tokens_seen": 135019064, + "step": 2039 + }, + { + "epoch": 0.19085505686338747, + "loss": 1.0912940502166748, + "loss_ce": 0.0029150533955544233, + "loss_iou": 0.47265625, + "loss_num": 0.0289306640625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 135019064, + "step": 2039 + }, + { + "epoch": 0.1909486591472832, + "grad_norm": 30.30205726623535, + "learning_rate": 5e-05, + "loss": 1.4011, + "num_input_tokens_seen": 135085700, + "step": 2040 + }, + { + "epoch": 0.1909486591472832, + "loss": 1.2939293384552002, + "loss_ce": 0.0029137025121599436, + "loss_iou": 0.57421875, + "loss_num": 0.0274658203125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 135085700, + "step": 2040 + }, + { + "epoch": 0.19104226143117892, + "grad_norm": 18.505590438842773, + "learning_rate": 5e-05, + "loss": 1.5128, + "num_input_tokens_seen": 135151936, + "step": 2041 + }, + { + "epoch": 0.19104226143117892, + "loss": 1.4650330543518066, + "loss_ce": 0.0026306798681616783, + "loss_iou": 0.640625, + "loss_num": 0.037109375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 135151936, + "step": 2041 + }, + { + "epoch": 0.19113586371507466, + "grad_norm": 28.354467391967773, + "learning_rate": 5e-05, + "loss": 1.2614, + "num_input_tokens_seen": 135217880, + "step": 2042 + }, + { + "epoch": 0.19113586371507466, + "loss": 1.3484842777252197, + "loss_ce": 0.003269465174525976, + "loss_iou": 0.53125, + "loss_num": 0.05712890625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 135217880, + "step": 2042 + }, + { + "epoch": 0.19122946599897037, + "grad_norm": 29.115802764892578, + "learning_rate": 5e-05, + "loss": 1.3967, + "num_input_tokens_seen": 135284100, + "step": 2043 + }, + { + "epoch": 0.19122946599897037, + "loss": 1.264920949935913, + "loss_ce": 0.0066202254965901375, + "loss_iou": 0.5859375, + "loss_num": 0.016845703125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 135284100, + "step": 2043 + }, + { + "epoch": 0.1913230682828661, + "grad_norm": 16.204648971557617, + "learning_rate": 5e-05, + "loss": 1.3888, + "num_input_tokens_seen": 135349864, + "step": 2044 + }, + { + "epoch": 0.1913230682828661, + "loss": 1.231990098953247, + "loss_ce": 0.010432527400553226, + "loss_iou": 0.5078125, + "loss_num": 0.04150390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 135349864, + "step": 2044 + }, + { + "epoch": 0.19141667056676182, + "grad_norm": 10.998099327087402, + "learning_rate": 5e-05, + "loss": 1.3206, + "num_input_tokens_seen": 135416512, + "step": 2045 + }, + { + "epoch": 0.19141667056676182, + "loss": 1.4172794818878174, + "loss_ce": 0.008099868893623352, + "loss_iou": 0.546875, + "loss_num": 0.0634765625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 135416512, + "step": 2045 + }, + { + "epoch": 0.19151027285065755, + "grad_norm": 22.290937423706055, + "learning_rate": 5e-05, + "loss": 1.3779, + "num_input_tokens_seen": 135481948, + "step": 2046 + }, + { + "epoch": 0.19151027285065755, + "loss": 1.562142252922058, + "loss_ce": 0.014046537689864635, + "loss_iou": 0.5703125, + "loss_num": 0.0810546875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 135481948, + "step": 2046 + }, + { + "epoch": 0.1916038751345533, + "grad_norm": 30.228641510009766, + "learning_rate": 5e-05, + "loss": 1.4567, + "num_input_tokens_seen": 135547780, + "step": 2047 + }, + { + "epoch": 0.1916038751345533, + "loss": 1.78278648853302, + "loss_ce": 0.00544277299195528, + "loss_iou": 0.71484375, + "loss_num": 0.06884765625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 135547780, + "step": 2047 + }, + { + "epoch": 0.191697477418449, + "grad_norm": 27.665626525878906, + "learning_rate": 5e-05, + "loss": 1.6013, + "num_input_tokens_seen": 135613412, + "step": 2048 + }, + { + "epoch": 0.191697477418449, + "loss": 1.3266444206237793, + "loss_ce": 0.005355388857424259, + "loss_iou": 0.56640625, + "loss_num": 0.037109375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 135613412, + "step": 2048 + }, + { + "epoch": 0.19179107970234474, + "grad_norm": 129.3882293701172, + "learning_rate": 5e-05, + "loss": 1.3811, + "num_input_tokens_seen": 135680668, + "step": 2049 + }, + { + "epoch": 0.19179107970234474, + "loss": 1.3647184371948242, + "loss_ce": 0.005343450233340263, + "loss_iou": 0.625, + "loss_num": 0.022705078125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 135680668, + "step": 2049 + }, + { + "epoch": 0.19188468198624045, + "grad_norm": 23.881717681884766, + "learning_rate": 5e-05, + "loss": 1.5054, + "num_input_tokens_seen": 135747252, + "step": 2050 + }, + { + "epoch": 0.19188468198624045, + "loss": 1.521451711654663, + "loss_ce": 0.007779826410114765, + "loss_iou": 0.6484375, + "loss_num": 0.0439453125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 135747252, + "step": 2050 + }, + { + "epoch": 0.1919782842701362, + "grad_norm": 21.621906280517578, + "learning_rate": 5e-05, + "loss": 1.2942, + "num_input_tokens_seen": 135813520, + "step": 2051 + }, + { + "epoch": 0.1919782842701362, + "loss": 1.4511830806732178, + "loss_ce": 0.003917410969734192, + "loss_iou": 0.609375, + "loss_num": 0.04638671875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 135813520, + "step": 2051 + }, + { + "epoch": 0.19207188655403193, + "grad_norm": 43.121639251708984, + "learning_rate": 5e-05, + "loss": 1.4368, + "num_input_tokens_seen": 135879308, + "step": 2052 + }, + { + "epoch": 0.19207188655403193, + "loss": 1.5804208517074585, + "loss_ce": 0.002295836340636015, + "loss_iou": 0.68359375, + "loss_num": 0.042236328125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 135879308, + "step": 2052 + }, + { + "epoch": 0.19216548883792764, + "grad_norm": 38.35646438598633, + "learning_rate": 5e-05, + "loss": 1.554, + "num_input_tokens_seen": 135945264, + "step": 2053 + }, + { + "epoch": 0.19216548883792764, + "loss": 1.4601187705993652, + "loss_ce": 0.010167606174945831, + "loss_iou": 0.60546875, + "loss_num": 0.0478515625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 135945264, + "step": 2053 + }, + { + "epoch": 0.19225909112182338, + "grad_norm": 15.436667442321777, + "learning_rate": 5e-05, + "loss": 1.3526, + "num_input_tokens_seen": 136012284, + "step": 2054 + }, + { + "epoch": 0.19225909112182338, + "loss": 1.357641339302063, + "loss_ce": 0.004125708714127541, + "loss_iou": 0.5234375, + "loss_num": 0.06201171875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 136012284, + "step": 2054 + }, + { + "epoch": 0.1923526934057191, + "grad_norm": 35.86705017089844, + "learning_rate": 5e-05, + "loss": 1.2888, + "num_input_tokens_seen": 136078704, + "step": 2055 + }, + { + "epoch": 0.1923526934057191, + "loss": 1.2883079051971436, + "loss_ce": 0.005776176694780588, + "loss_iou": 0.53515625, + "loss_num": 0.0419921875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 136078704, + "step": 2055 + }, + { + "epoch": 0.19244629568961483, + "grad_norm": 20.787574768066406, + "learning_rate": 5e-05, + "loss": 1.5442, + "num_input_tokens_seen": 136145564, + "step": 2056 + }, + { + "epoch": 0.19244629568961483, + "loss": 1.7299318313598633, + "loss_ce": 0.0043459320440888405, + "loss_iou": 0.71875, + "loss_num": 0.058349609375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 136145564, + "step": 2056 + }, + { + "epoch": 0.19253989797351057, + "grad_norm": 16.39914321899414, + "learning_rate": 5e-05, + "loss": 1.2208, + "num_input_tokens_seen": 136212092, + "step": 2057 + }, + { + "epoch": 0.19253989797351057, + "loss": 1.276814579963684, + "loss_ce": 0.003377099521458149, + "loss_iou": 0.5703125, + "loss_num": 0.0257568359375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 136212092, + "step": 2057 + }, + { + "epoch": 0.19263350025740628, + "grad_norm": 22.33026123046875, + "learning_rate": 5e-05, + "loss": 1.5323, + "num_input_tokens_seen": 136278912, + "step": 2058 + }, + { + "epoch": 0.19263350025740628, + "loss": 1.7549822330474854, + "loss_ce": 0.005958713591098785, + "loss_iou": 0.75390625, + "loss_num": 0.048095703125, + "loss_xval": 1.75, + "num_input_tokens_seen": 136278912, + "step": 2058 + }, + { + "epoch": 0.19272710254130201, + "grad_norm": 17.806638717651367, + "learning_rate": 5e-05, + "loss": 1.5435, + "num_input_tokens_seen": 136345416, + "step": 2059 + }, + { + "epoch": 0.19272710254130201, + "loss": 1.5639630556106567, + "loss_ce": 0.006345891393721104, + "loss_iou": 0.671875, + "loss_num": 0.043212890625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 136345416, + "step": 2059 + }, + { + "epoch": 0.19282070482519773, + "grad_norm": 17.42680549621582, + "learning_rate": 5e-05, + "loss": 1.3353, + "num_input_tokens_seen": 136411592, + "step": 2060 + }, + { + "epoch": 0.19282070482519773, + "loss": 1.1476075649261475, + "loss_ce": 0.007470767013728619, + "loss_iou": 0.5, + "loss_num": 0.0274658203125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 136411592, + "step": 2060 + }, + { + "epoch": 0.19291430710909346, + "grad_norm": 18.45506477355957, + "learning_rate": 5e-05, + "loss": 1.3657, + "num_input_tokens_seen": 136478500, + "step": 2061 + }, + { + "epoch": 0.19291430710909346, + "loss": 1.2191590070724487, + "loss_ce": 0.004803495481610298, + "loss_iou": 0.5390625, + "loss_num": 0.027587890625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 136478500, + "step": 2061 + }, + { + "epoch": 0.1930079093929892, + "grad_norm": 23.658056259155273, + "learning_rate": 5e-05, + "loss": 1.5858, + "num_input_tokens_seen": 136543556, + "step": 2062 + }, + { + "epoch": 0.1930079093929892, + "loss": 1.5524146556854248, + "loss_ce": 0.007004569284617901, + "loss_iou": 0.640625, + "loss_num": 0.052734375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 136543556, + "step": 2062 + }, + { + "epoch": 0.1931015116768849, + "grad_norm": 69.70921325683594, + "learning_rate": 5e-05, + "loss": 1.6602, + "num_input_tokens_seen": 136609576, + "step": 2063 + }, + { + "epoch": 0.1931015116768849, + "loss": 1.5370267629623413, + "loss_ce": 0.005044293124228716, + "loss_iou": 0.625, + "loss_num": 0.056396484375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 136609576, + "step": 2063 + }, + { + "epoch": 0.19319511396078065, + "grad_norm": 21.903438568115234, + "learning_rate": 5e-05, + "loss": 1.561, + "num_input_tokens_seen": 136676252, + "step": 2064 + }, + { + "epoch": 0.19319511396078065, + "loss": 1.5233875513076782, + "loss_ce": 0.004832881968468428, + "loss_iou": 0.63671875, + "loss_num": 0.049072265625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 136676252, + "step": 2064 + }, + { + "epoch": 0.19328871624467636, + "grad_norm": 11.89264965057373, + "learning_rate": 5e-05, + "loss": 1.5001, + "num_input_tokens_seen": 136742748, + "step": 2065 + }, + { + "epoch": 0.19328871624467636, + "loss": 1.5451369285583496, + "loss_ce": 0.0021682712249457836, + "loss_iou": 0.6640625, + "loss_num": 0.04296875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 136742748, + "step": 2065 + }, + { + "epoch": 0.1933823185285721, + "grad_norm": 30.70446014404297, + "learning_rate": 5e-05, + "loss": 1.4357, + "num_input_tokens_seen": 136809404, + "step": 2066 + }, + { + "epoch": 0.1933823185285721, + "loss": 1.6584296226501465, + "loss_ce": 0.0046210456639528275, + "loss_iou": 0.66796875, + "loss_num": 0.0634765625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 136809404, + "step": 2066 + }, + { + "epoch": 0.1934759208124678, + "grad_norm": 32.82989501953125, + "learning_rate": 5e-05, + "loss": 1.6026, + "num_input_tokens_seen": 136875364, + "step": 2067 + }, + { + "epoch": 0.1934759208124678, + "loss": 1.6275928020477295, + "loss_ce": 0.004057623445987701, + "loss_iou": 0.66015625, + "loss_num": 0.06103515625, + "loss_xval": 1.625, + "num_input_tokens_seen": 136875364, + "step": 2067 + }, + { + "epoch": 0.19356952309636355, + "grad_norm": 18.987201690673828, + "learning_rate": 5e-05, + "loss": 1.5606, + "num_input_tokens_seen": 136942216, + "step": 2068 + }, + { + "epoch": 0.19356952309636355, + "loss": 1.5228208303451538, + "loss_ce": 0.0038388194516301155, + "loss_iou": 0.6328125, + "loss_num": 0.050048828125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 136942216, + "step": 2068 + }, + { + "epoch": 0.1936631253802593, + "grad_norm": 10.04777717590332, + "learning_rate": 5e-05, + "loss": 1.3714, + "num_input_tokens_seen": 137008152, + "step": 2069 + }, + { + "epoch": 0.1936631253802593, + "loss": 1.1534812450408936, + "loss_ce": 0.004799516871571541, + "loss_iou": 0.49609375, + "loss_num": 0.031494140625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 137008152, + "step": 2069 + }, + { + "epoch": 0.193756727664155, + "grad_norm": 15.951964378356934, + "learning_rate": 5e-05, + "loss": 1.1364, + "num_input_tokens_seen": 137075148, + "step": 2070 + }, + { + "epoch": 0.193756727664155, + "loss": 1.2243413925170898, + "loss_ce": 0.006354369223117828, + "loss_iou": 0.49609375, + "loss_num": 0.044677734375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 137075148, + "step": 2070 + }, + { + "epoch": 0.19385032994805074, + "grad_norm": 77.72500610351562, + "learning_rate": 5e-05, + "loss": 1.4225, + "num_input_tokens_seen": 137141752, + "step": 2071 + }, + { + "epoch": 0.19385032994805074, + "loss": 1.2814772129058838, + "loss_ce": 0.003156971652060747, + "loss_iou": 0.57421875, + "loss_num": 0.026611328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 137141752, + "step": 2071 + }, + { + "epoch": 0.19394393223194645, + "grad_norm": 41.3846435546875, + "learning_rate": 5e-05, + "loss": 1.5205, + "num_input_tokens_seen": 137208480, + "step": 2072 + }, + { + "epoch": 0.19394393223194645, + "loss": 1.5058314800262451, + "loss_ce": 0.007784511893987656, + "loss_iou": 0.640625, + "loss_num": 0.04296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 137208480, + "step": 2072 + }, + { + "epoch": 0.19403753451584219, + "grad_norm": 22.192081451416016, + "learning_rate": 5e-05, + "loss": 1.6468, + "num_input_tokens_seen": 137274816, + "step": 2073 + }, + { + "epoch": 0.19403753451584219, + "loss": 1.4129598140716553, + "loss_ce": 0.007442166563123465, + "loss_iou": 0.6015625, + "loss_num": 0.0400390625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 137274816, + "step": 2073 + }, + { + "epoch": 0.19413113679973792, + "grad_norm": 19.99022674560547, + "learning_rate": 5e-05, + "loss": 1.3093, + "num_input_tokens_seen": 137341384, + "step": 2074 + }, + { + "epoch": 0.19413113679973792, + "loss": 1.2690801620483398, + "loss_ce": 0.006384734064340591, + "loss_iou": 0.53515625, + "loss_num": 0.038818359375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 137341384, + "step": 2074 + }, + { + "epoch": 0.19422473908363364, + "grad_norm": 35.265750885009766, + "learning_rate": 5e-05, + "loss": 1.4349, + "num_input_tokens_seen": 137408812, + "step": 2075 + }, + { + "epoch": 0.19422473908363364, + "loss": 1.3765074014663696, + "loss_ce": 0.004437153693288565, + "loss_iou": 0.59765625, + "loss_num": 0.03515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 137408812, + "step": 2075 + }, + { + "epoch": 0.19431834136752937, + "grad_norm": 22.01068115234375, + "learning_rate": 5e-05, + "loss": 1.7257, + "num_input_tokens_seen": 137475192, + "step": 2076 + }, + { + "epoch": 0.19431834136752937, + "loss": 1.6239678859710693, + "loss_ce": 0.0038506705313920975, + "loss_iou": 0.671875, + "loss_num": 0.0546875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 137475192, + "step": 2076 + }, + { + "epoch": 0.19441194365142508, + "grad_norm": 25.21246910095215, + "learning_rate": 5e-05, + "loss": 1.3686, + "num_input_tokens_seen": 137542068, + "step": 2077 + }, + { + "epoch": 0.19441194365142508, + "loss": 1.4951391220092773, + "loss_ce": 0.005392924882471561, + "loss_iou": 0.63671875, + "loss_num": 0.043212890625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 137542068, + "step": 2077 + }, + { + "epoch": 0.19450554593532082, + "grad_norm": 26.272916793823242, + "learning_rate": 5e-05, + "loss": 1.2421, + "num_input_tokens_seen": 137608836, + "step": 2078 + }, + { + "epoch": 0.19450554593532082, + "loss": 1.3518580198287964, + "loss_ce": 0.005178361665457487, + "loss_iou": 0.59765625, + "loss_num": 0.030517578125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 137608836, + "step": 2078 + }, + { + "epoch": 0.19459914821921656, + "grad_norm": 19.860158920288086, + "learning_rate": 5e-05, + "loss": 1.2909, + "num_input_tokens_seen": 137674152, + "step": 2079 + }, + { + "epoch": 0.19459914821921656, + "loss": 1.1912102699279785, + "loss_ce": 0.006884138099849224, + "loss_iou": 0.515625, + "loss_num": 0.030517578125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 137674152, + "step": 2079 + }, + { + "epoch": 0.19469275050311227, + "grad_norm": 19.28133773803711, + "learning_rate": 5e-05, + "loss": 1.3627, + "num_input_tokens_seen": 137740688, + "step": 2080 + }, + { + "epoch": 0.19469275050311227, + "loss": 1.4136195182800293, + "loss_ce": 0.011275755241513252, + "loss_iou": 0.59375, + "loss_num": 0.042724609375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 137740688, + "step": 2080 + }, + { + "epoch": 0.194786352787008, + "grad_norm": 26.150165557861328, + "learning_rate": 5e-05, + "loss": 1.559, + "num_input_tokens_seen": 137806812, + "step": 2081 + }, + { + "epoch": 0.194786352787008, + "loss": 1.5323134660720825, + "loss_ce": 0.0020399647764861584, + "loss_iou": 0.63671875, + "loss_num": 0.051025390625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 137806812, + "step": 2081 + }, + { + "epoch": 0.19487995507090372, + "grad_norm": 66.3988265991211, + "learning_rate": 5e-05, + "loss": 1.6166, + "num_input_tokens_seen": 137872808, + "step": 2082 + }, + { + "epoch": 0.19487995507090372, + "loss": 1.366492509841919, + "loss_ce": 0.006629281677305698, + "loss_iou": 0.578125, + "loss_num": 0.040283203125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 137872808, + "step": 2082 + }, + { + "epoch": 0.19497355735479946, + "grad_norm": 23.48615074157715, + "learning_rate": 5e-05, + "loss": 1.4515, + "num_input_tokens_seen": 137939268, + "step": 2083 + }, + { + "epoch": 0.19497355735479946, + "loss": 1.219407320022583, + "loss_ce": 0.004075351171195507, + "loss_iou": 0.5234375, + "loss_num": 0.032958984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 137939268, + "step": 2083 + }, + { + "epoch": 0.1950671596386952, + "grad_norm": 26.136940002441406, + "learning_rate": 5e-05, + "loss": 1.2827, + "num_input_tokens_seen": 138004908, + "step": 2084 + }, + { + "epoch": 0.1950671596386952, + "loss": 1.1523009538650513, + "loss_ce": 0.007281413301825523, + "loss_iou": 0.49609375, + "loss_num": 0.0301513671875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 138004908, + "step": 2084 + }, + { + "epoch": 0.1951607619225909, + "grad_norm": 24.42344093322754, + "learning_rate": 5e-05, + "loss": 1.6681, + "num_input_tokens_seen": 138072064, + "step": 2085 + }, + { + "epoch": 0.1951607619225909, + "loss": 1.651878833770752, + "loss_ce": 0.004906099289655685, + "loss_iou": 0.69921875, + "loss_num": 0.049560546875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 138072064, + "step": 2085 + }, + { + "epoch": 0.19525436420648665, + "grad_norm": 26.99013328552246, + "learning_rate": 5e-05, + "loss": 1.3346, + "num_input_tokens_seen": 138137952, + "step": 2086 + }, + { + "epoch": 0.19525436420648665, + "loss": 1.3745825290679932, + "loss_ce": 0.0025121495127677917, + "loss_iou": 0.5390625, + "loss_num": 0.05810546875, + "loss_xval": 1.375, + "num_input_tokens_seen": 138137952, + "step": 2086 + }, + { + "epoch": 0.19534796649038236, + "grad_norm": 22.294784545898438, + "learning_rate": 5e-05, + "loss": 1.5853, + "num_input_tokens_seen": 138202528, + "step": 2087 + }, + { + "epoch": 0.19534796649038236, + "loss": 1.4166902303695679, + "loss_ce": 0.006534058600664139, + "loss_iou": 0.62109375, + "loss_num": 0.033447265625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 138202528, + "step": 2087 + }, + { + "epoch": 0.1954415687742781, + "grad_norm": 13.048158645629883, + "learning_rate": 5e-05, + "loss": 1.3337, + "num_input_tokens_seen": 138269120, + "step": 2088 + }, + { + "epoch": 0.1954415687742781, + "loss": 1.5177942514419556, + "loss_ce": 0.005098981782793999, + "loss_iou": 0.625, + "loss_num": 0.05224609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 138269120, + "step": 2088 + }, + { + "epoch": 0.1955351710581738, + "grad_norm": 20.494586944580078, + "learning_rate": 5e-05, + "loss": 1.5094, + "num_input_tokens_seen": 138335564, + "step": 2089 + }, + { + "epoch": 0.1955351710581738, + "loss": 1.535733699798584, + "loss_ce": 0.006436879746615887, + "loss_iou": 0.6796875, + "loss_num": 0.03466796875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 138335564, + "step": 2089 + }, + { + "epoch": 0.19562877334206955, + "grad_norm": 17.31364631652832, + "learning_rate": 5e-05, + "loss": 1.178, + "num_input_tokens_seen": 138402628, + "step": 2090 + }, + { + "epoch": 0.19562877334206955, + "loss": 1.0419156551361084, + "loss_ce": 0.008224312216043472, + "loss_iou": 0.44140625, + "loss_num": 0.0299072265625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 138402628, + "step": 2090 + }, + { + "epoch": 0.19572237562596528, + "grad_norm": 12.1629056930542, + "learning_rate": 5e-05, + "loss": 1.513, + "num_input_tokens_seen": 138468852, + "step": 2091 + }, + { + "epoch": 0.19572237562596528, + "loss": 1.4543973207473755, + "loss_ce": 0.004201980773359537, + "loss_iou": 0.61328125, + "loss_num": 0.044921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 138468852, + "step": 2091 + }, + { + "epoch": 0.195815977909861, + "grad_norm": 64.06700134277344, + "learning_rate": 5e-05, + "loss": 1.358, + "num_input_tokens_seen": 138534760, + "step": 2092 + }, + { + "epoch": 0.195815977909861, + "loss": 1.3369941711425781, + "loss_ce": 0.006427827291190624, + "loss_iou": 0.51171875, + "loss_num": 0.06201171875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 138534760, + "step": 2092 + }, + { + "epoch": 0.19590958019375673, + "grad_norm": 29.642932891845703, + "learning_rate": 5e-05, + "loss": 1.338, + "num_input_tokens_seen": 138601456, + "step": 2093 + }, + { + "epoch": 0.19590958019375673, + "loss": 1.1915972232818604, + "loss_ce": 0.0080033540725708, + "loss_iou": 0.546875, + "loss_num": 0.0186767578125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 138601456, + "step": 2093 + }, + { + "epoch": 0.19600318247765244, + "grad_norm": 173.1945037841797, + "learning_rate": 5e-05, + "loss": 1.3226, + "num_input_tokens_seen": 138668296, + "step": 2094 + }, + { + "epoch": 0.19600318247765244, + "loss": 1.2930828332901, + "loss_ce": 0.005057895090430975, + "loss_iou": 0.5546875, + "loss_num": 0.035400390625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 138668296, + "step": 2094 + }, + { + "epoch": 0.19609678476154818, + "grad_norm": 23.310731887817383, + "learning_rate": 5e-05, + "loss": 1.2903, + "num_input_tokens_seen": 138734044, + "step": 2095 + }, + { + "epoch": 0.19609678476154818, + "loss": 1.418961763381958, + "loss_ce": 0.001969480188563466, + "loss_iou": 0.59375, + "loss_num": 0.045654296875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 138734044, + "step": 2095 + }, + { + "epoch": 0.19619038704544392, + "grad_norm": 32.93905258178711, + "learning_rate": 5e-05, + "loss": 1.122, + "num_input_tokens_seen": 138801836, + "step": 2096 + }, + { + "epoch": 0.19619038704544392, + "loss": 1.181718349456787, + "loss_ce": 0.0059369634836912155, + "loss_iou": 0.5, + "loss_num": 0.03466796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 138801836, + "step": 2096 + }, + { + "epoch": 0.19628398932933963, + "grad_norm": 25.704120635986328, + "learning_rate": 5e-05, + "loss": 1.3838, + "num_input_tokens_seen": 138868040, + "step": 2097 + }, + { + "epoch": 0.19628398932933963, + "loss": 1.3380424976348877, + "loss_ce": 0.010405833832919598, + "loss_iou": 0.58984375, + "loss_num": 0.029541015625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 138868040, + "step": 2097 + }, + { + "epoch": 0.19637759161323537, + "grad_norm": 54.63113021850586, + "learning_rate": 5e-05, + "loss": 1.395, + "num_input_tokens_seen": 138933456, + "step": 2098 + }, + { + "epoch": 0.19637759161323537, + "loss": 1.4318453073501587, + "loss_ce": 0.006552326492965221, + "loss_iou": 0.6171875, + "loss_num": 0.0380859375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 138933456, + "step": 2098 + }, + { + "epoch": 0.19647119389713108, + "grad_norm": 18.833669662475586, + "learning_rate": 5e-05, + "loss": 1.6439, + "num_input_tokens_seen": 138999968, + "step": 2099 + }, + { + "epoch": 0.19647119389713108, + "loss": 1.536208987236023, + "loss_ce": 0.00398245407268405, + "loss_iou": 0.65234375, + "loss_num": 0.044677734375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 138999968, + "step": 2099 + }, + { + "epoch": 0.19656479618102682, + "grad_norm": 21.15911865234375, + "learning_rate": 5e-05, + "loss": 1.4839, + "num_input_tokens_seen": 139066696, + "step": 2100 + }, + { + "epoch": 0.19656479618102682, + "loss": 1.5681376457214355, + "loss_ce": 0.004661109764128923, + "loss_iou": 0.59375, + "loss_num": 0.07470703125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 139066696, + "step": 2100 + }, + { + "epoch": 0.19665839846492256, + "grad_norm": 27.802852630615234, + "learning_rate": 5e-05, + "loss": 1.5475, + "num_input_tokens_seen": 139133616, + "step": 2101 + }, + { + "epoch": 0.19665839846492256, + "loss": 1.6107966899871826, + "loss_ce": 0.01802327111363411, + "loss_iou": 0.6484375, + "loss_num": 0.059814453125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 139133616, + "step": 2101 + }, + { + "epoch": 0.19675200074881827, + "grad_norm": 24.559831619262695, + "learning_rate": 5e-05, + "loss": 1.2611, + "num_input_tokens_seen": 139199640, + "step": 2102 + }, + { + "epoch": 0.19675200074881827, + "loss": 1.2036356925964355, + "loss_ce": 0.005027277860790491, + "loss_iou": 0.490234375, + "loss_num": 0.04345703125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 139199640, + "step": 2102 + }, + { + "epoch": 0.196845603032714, + "grad_norm": 25.998132705688477, + "learning_rate": 5e-05, + "loss": 1.5986, + "num_input_tokens_seen": 139265920, + "step": 2103 + }, + { + "epoch": 0.196845603032714, + "loss": 1.5837655067443848, + "loss_ce": 0.007593577262014151, + "loss_iou": 0.609375, + "loss_num": 0.0712890625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 139265920, + "step": 2103 + }, + { + "epoch": 0.19693920531660972, + "grad_norm": 26.764257431030273, + "learning_rate": 5e-05, + "loss": 1.3174, + "num_input_tokens_seen": 139332048, + "step": 2104 + }, + { + "epoch": 0.19693920531660972, + "loss": 1.2746940851211548, + "loss_ce": 0.0066276658326387405, + "loss_iou": 0.52734375, + "loss_num": 0.04248046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 139332048, + "step": 2104 + }, + { + "epoch": 0.19703280760050546, + "grad_norm": 13.437508583068848, + "learning_rate": 5e-05, + "loss": 1.2624, + "num_input_tokens_seen": 139398548, + "step": 2105 + }, + { + "epoch": 0.19703280760050546, + "loss": 1.1900551319122314, + "loss_ce": 0.00304337777197361, + "loss_iou": 0.515625, + "loss_num": 0.031982421875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 139398548, + "step": 2105 + }, + { + "epoch": 0.19712640988440117, + "grad_norm": 16.992902755737305, + "learning_rate": 5e-05, + "loss": 1.2524, + "num_input_tokens_seen": 139464796, + "step": 2106 + }, + { + "epoch": 0.19712640988440117, + "loss": 1.3787537813186646, + "loss_ce": 0.005218650214374065, + "loss_iou": 0.57421875, + "loss_num": 0.045654296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 139464796, + "step": 2106 + }, + { + "epoch": 0.1972200121682969, + "grad_norm": 67.82573699951172, + "learning_rate": 5e-05, + "loss": 1.3071, + "num_input_tokens_seen": 139531248, + "step": 2107 + }, + { + "epoch": 0.1972200121682969, + "loss": 1.1374847888946533, + "loss_ce": 0.005770938470959663, + "loss_iou": 0.458984375, + "loss_num": 0.04248046875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 139531248, + "step": 2107 + }, + { + "epoch": 0.19731361445219264, + "grad_norm": 26.257421493530273, + "learning_rate": 5e-05, + "loss": 1.7659, + "num_input_tokens_seen": 139596544, + "step": 2108 + }, + { + "epoch": 0.19731361445219264, + "loss": 1.7995635271072388, + "loss_ce": 0.007571268826723099, + "loss_iou": 0.7421875, + "loss_num": 0.061767578125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 139596544, + "step": 2108 + }, + { + "epoch": 0.19740721673608835, + "grad_norm": 30.2551212310791, + "learning_rate": 5e-05, + "loss": 1.193, + "num_input_tokens_seen": 139663640, + "step": 2109 + }, + { + "epoch": 0.19740721673608835, + "loss": 1.1639381647109985, + "loss_ce": 0.0013405437348410487, + "loss_iou": 0.515625, + "loss_num": 0.0257568359375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 139663640, + "step": 2109 + }, + { + "epoch": 0.1975008190199841, + "grad_norm": 11.374149322509766, + "learning_rate": 5e-05, + "loss": 1.2721, + "num_input_tokens_seen": 139730368, + "step": 2110 + }, + { + "epoch": 0.1975008190199841, + "loss": 1.266725778579712, + "loss_ce": 0.007448405493050814, + "loss_iou": 0.546875, + "loss_num": 0.032470703125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 139730368, + "step": 2110 + }, + { + "epoch": 0.1975944213038798, + "grad_norm": 109.66407775878906, + "learning_rate": 5e-05, + "loss": 1.2802, + "num_input_tokens_seen": 139796708, + "step": 2111 + }, + { + "epoch": 0.1975944213038798, + "loss": 1.364131212234497, + "loss_ce": 0.0032914401963353157, + "loss_iou": 0.5703125, + "loss_num": 0.044677734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 139796708, + "step": 2111 + }, + { + "epoch": 0.19768802358777554, + "grad_norm": 28.161298751831055, + "learning_rate": 5e-05, + "loss": 1.4388, + "num_input_tokens_seen": 139863900, + "step": 2112 + }, + { + "epoch": 0.19768802358777554, + "loss": 1.3507535457611084, + "loss_ce": 0.003097320906817913, + "loss_iou": 0.578125, + "loss_num": 0.038330078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 139863900, + "step": 2112 + }, + { + "epoch": 0.19778162587167128, + "grad_norm": 22.915809631347656, + "learning_rate": 5e-05, + "loss": 1.4894, + "num_input_tokens_seen": 139931096, + "step": 2113 + }, + { + "epoch": 0.19778162587167128, + "loss": 1.3303176164627075, + "loss_ce": 0.0021926192566752434, + "loss_iou": 0.58984375, + "loss_num": 0.02880859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 139931096, + "step": 2113 + }, + { + "epoch": 0.197875228155567, + "grad_norm": 25.763959884643555, + "learning_rate": 5e-05, + "loss": 1.3341, + "num_input_tokens_seen": 139998124, + "step": 2114 + }, + { + "epoch": 0.197875228155567, + "loss": 1.458052158355713, + "loss_ce": 0.002485835924744606, + "loss_iou": 0.57421875, + "loss_num": 0.062255859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 139998124, + "step": 2114 + }, + { + "epoch": 0.19796883043946273, + "grad_norm": 24.01043128967285, + "learning_rate": 5e-05, + "loss": 1.4405, + "num_input_tokens_seen": 140065164, + "step": 2115 + }, + { + "epoch": 0.19796883043946273, + "loss": 1.3948493003845215, + "loss_ce": 0.006177469156682491, + "loss_iou": 0.6015625, + "loss_num": 0.03662109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 140065164, + "step": 2115 + }, + { + "epoch": 0.19806243272335844, + "grad_norm": 24.73077392578125, + "learning_rate": 5e-05, + "loss": 1.1244, + "num_input_tokens_seen": 140129844, + "step": 2116 + }, + { + "epoch": 0.19806243272335844, + "loss": 1.1548796892166138, + "loss_ce": 0.005953886546194553, + "loss_iou": 0.474609375, + "loss_num": 0.0400390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 140129844, + "step": 2116 + }, + { + "epoch": 0.19815603500725418, + "grad_norm": 25.4124813079834, + "learning_rate": 5e-05, + "loss": 1.4685, + "num_input_tokens_seen": 140195892, + "step": 2117 + }, + { + "epoch": 0.19815603500725418, + "loss": 1.6075721979141235, + "loss_ce": 0.006986259017139673, + "loss_iou": 0.65625, + "loss_num": 0.05712890625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 140195892, + "step": 2117 + }, + { + "epoch": 0.19824963729114992, + "grad_norm": 33.2074089050293, + "learning_rate": 5e-05, + "loss": 1.5424, + "num_input_tokens_seen": 140261484, + "step": 2118 + }, + { + "epoch": 0.19824963729114992, + "loss": 1.6192563772201538, + "loss_ce": 0.005486827343702316, + "loss_iou": 0.67578125, + "loss_num": 0.052001953125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 140261484, + "step": 2118 + }, + { + "epoch": 0.19834323957504563, + "grad_norm": 19.876880645751953, + "learning_rate": 5e-05, + "loss": 1.6574, + "num_input_tokens_seen": 140327892, + "step": 2119 + }, + { + "epoch": 0.19834323957504563, + "loss": 1.7455635070800781, + "loss_ce": 0.005329243838787079, + "loss_iou": 0.7265625, + "loss_num": 0.0576171875, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 140327892, + "step": 2119 + }, + { + "epoch": 0.19843684185894137, + "grad_norm": 33.19362258911133, + "learning_rate": 5e-05, + "loss": 1.3776, + "num_input_tokens_seen": 140394832, + "step": 2120 + }, + { + "epoch": 0.19843684185894137, + "loss": 1.3313980102539062, + "loss_ce": 0.005714379251003265, + "loss_iou": 0.55078125, + "loss_num": 0.044677734375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 140394832, + "step": 2120 + }, + { + "epoch": 0.19853044414283708, + "grad_norm": 20.490461349487305, + "learning_rate": 5e-05, + "loss": 1.3399, + "num_input_tokens_seen": 140461516, + "step": 2121 + }, + { + "epoch": 0.19853044414283708, + "loss": 1.456218957901001, + "loss_ce": 0.005047088488936424, + "loss_iou": 0.63671875, + "loss_num": 0.03515625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 140461516, + "step": 2121 + }, + { + "epoch": 0.19862404642673281, + "grad_norm": 36.80075454711914, + "learning_rate": 5e-05, + "loss": 1.5404, + "num_input_tokens_seen": 140527928, + "step": 2122 + }, + { + "epoch": 0.19862404642673281, + "loss": 1.4016985893249512, + "loss_ce": 0.00374927488155663, + "loss_iou": 0.59765625, + "loss_num": 0.041015625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 140527928, + "step": 2122 + }, + { + "epoch": 0.19871764871062855, + "grad_norm": 21.78231430053711, + "learning_rate": 5e-05, + "loss": 1.594, + "num_input_tokens_seen": 140593928, + "step": 2123 + }, + { + "epoch": 0.19871764871062855, + "loss": 1.6748840808868408, + "loss_ce": 0.004962306469678879, + "loss_iou": 0.6953125, + "loss_num": 0.054931640625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 140593928, + "step": 2123 + }, + { + "epoch": 0.19881125099452426, + "grad_norm": 23.20709991455078, + "learning_rate": 5e-05, + "loss": 1.3419, + "num_input_tokens_seen": 140660548, + "step": 2124 + }, + { + "epoch": 0.19881125099452426, + "loss": 1.2174222469329834, + "loss_ce": 0.00453170295804739, + "loss_iou": 0.53125, + "loss_num": 0.030029296875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 140660548, + "step": 2124 + }, + { + "epoch": 0.19890485327842, + "grad_norm": 24.04755210876465, + "learning_rate": 5e-05, + "loss": 1.5632, + "num_input_tokens_seen": 140726312, + "step": 2125 + }, + { + "epoch": 0.19890485327842, + "loss": 1.418169379234314, + "loss_ce": 0.0038627460598945618, + "loss_iou": 0.57421875, + "loss_num": 0.053466796875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 140726312, + "step": 2125 + }, + { + "epoch": 0.1989984555623157, + "grad_norm": 12.664782524108887, + "learning_rate": 5e-05, + "loss": 1.2301, + "num_input_tokens_seen": 140792020, + "step": 2126 + }, + { + "epoch": 0.1989984555623157, + "loss": 1.1144989728927612, + "loss_ce": 0.008541987277567387, + "loss_iou": 0.443359375, + "loss_num": 0.043701171875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 140792020, + "step": 2126 + }, + { + "epoch": 0.19909205784621145, + "grad_norm": 14.952765464782715, + "learning_rate": 5e-05, + "loss": 1.4113, + "num_input_tokens_seen": 140858640, + "step": 2127 + }, + { + "epoch": 0.19909205784621145, + "loss": 1.442549228668213, + "loss_ce": 0.007002345286309719, + "loss_iou": 0.60546875, + "loss_num": 0.045166015625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 140858640, + "step": 2127 + }, + { + "epoch": 0.19918566013010716, + "grad_norm": 34.664398193359375, + "learning_rate": 5e-05, + "loss": 1.3865, + "num_input_tokens_seen": 140923380, + "step": 2128 + }, + { + "epoch": 0.19918566013010716, + "loss": 1.5297850370407104, + "loss_ce": 0.005859227851033211, + "loss_iou": 0.609375, + "loss_num": 0.061279296875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 140923380, + "step": 2128 + }, + { + "epoch": 0.1992792624140029, + "grad_norm": 23.958240509033203, + "learning_rate": 5e-05, + "loss": 1.5756, + "num_input_tokens_seen": 140989880, + "step": 2129 + }, + { + "epoch": 0.1992792624140029, + "loss": 1.7080774307250977, + "loss_ce": 0.005928914062678814, + "loss_iou": 0.7734375, + "loss_num": 0.0303955078125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 140989880, + "step": 2129 + }, + { + "epoch": 0.19937286469789864, + "grad_norm": 34.81669235229492, + "learning_rate": 5e-05, + "loss": 1.4449, + "num_input_tokens_seen": 141057188, + "step": 2130 + }, + { + "epoch": 0.19937286469789864, + "loss": 1.278939962387085, + "loss_ce": 0.0064790514297783375, + "loss_iou": 0.51953125, + "loss_num": 0.046630859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 141057188, + "step": 2130 + }, + { + "epoch": 0.19946646698179435, + "grad_norm": 49.78134536743164, + "learning_rate": 5e-05, + "loss": 1.4739, + "num_input_tokens_seen": 141123452, + "step": 2131 + }, + { + "epoch": 0.19946646698179435, + "loss": 1.5672564506530762, + "loss_ce": 0.009639300405979156, + "loss_iou": 0.6875, + "loss_num": 0.035888671875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 141123452, + "step": 2131 + }, + { + "epoch": 0.1995600692656901, + "grad_norm": 15.674970626831055, + "learning_rate": 5e-05, + "loss": 1.5758, + "num_input_tokens_seen": 141189372, + "step": 2132 + }, + { + "epoch": 0.1995600692656901, + "loss": 1.4908785820007324, + "loss_ce": 0.005649122409522533, + "loss_iou": 0.63671875, + "loss_num": 0.042724609375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 141189372, + "step": 2132 + }, + { + "epoch": 0.1996536715495858, + "grad_norm": 23.224313735961914, + "learning_rate": 5e-05, + "loss": 1.455, + "num_input_tokens_seen": 141256076, + "step": 2133 + }, + { + "epoch": 0.1996536715495858, + "loss": 1.4713666439056396, + "loss_ce": 0.003593207336962223, + "loss_iou": 0.640625, + "loss_num": 0.03759765625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 141256076, + "step": 2133 + }, + { + "epoch": 0.19974727383348154, + "grad_norm": 62.98255920410156, + "learning_rate": 5e-05, + "loss": 1.3374, + "num_input_tokens_seen": 141323612, + "step": 2134 + }, + { + "epoch": 0.19974727383348154, + "loss": 1.3081867694854736, + "loss_ce": 0.004475940950214863, + "loss_iou": 0.578125, + "loss_num": 0.02978515625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 141323612, + "step": 2134 + }, + { + "epoch": 0.19984087611737728, + "grad_norm": 21.07288932800293, + "learning_rate": 5e-05, + "loss": 1.3495, + "num_input_tokens_seen": 141390536, + "step": 2135 + }, + { + "epoch": 0.19984087611737728, + "loss": 1.368180513381958, + "loss_ce": 0.007828842848539352, + "loss_iou": 0.609375, + "loss_num": 0.0286865234375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 141390536, + "step": 2135 + }, + { + "epoch": 0.19993447840127299, + "grad_norm": 35.24394607543945, + "learning_rate": 5e-05, + "loss": 1.3854, + "num_input_tokens_seen": 141456632, + "step": 2136 + }, + { + "epoch": 0.19993447840127299, + "loss": 1.4562913179397583, + "loss_ce": 0.00609607994556427, + "loss_iou": 0.609375, + "loss_num": 0.04638671875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 141456632, + "step": 2136 + }, + { + "epoch": 0.20002808068516872, + "grad_norm": 55.776893615722656, + "learning_rate": 5e-05, + "loss": 1.5803, + "num_input_tokens_seen": 141522892, + "step": 2137 + }, + { + "epoch": 0.20002808068516872, + "loss": 1.470313310623169, + "loss_ce": 0.007910918444395065, + "loss_iou": 0.60546875, + "loss_num": 0.05029296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 141522892, + "step": 2137 + }, + { + "epoch": 0.20012168296906443, + "grad_norm": 43.92560577392578, + "learning_rate": 5e-05, + "loss": 1.3085, + "num_input_tokens_seen": 141588712, + "step": 2138 + }, + { + "epoch": 0.20012168296906443, + "loss": 1.2516255378723145, + "loss_ce": 0.003090436104685068, + "loss_iou": 0.5234375, + "loss_num": 0.040771484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 141588712, + "step": 2138 + }, + { + "epoch": 0.20021528525296017, + "grad_norm": 225.448974609375, + "learning_rate": 5e-05, + "loss": 1.1565, + "num_input_tokens_seen": 141655232, + "step": 2139 + }, + { + "epoch": 0.20021528525296017, + "loss": 1.0503004789352417, + "loss_ce": 0.0058669159188866615, + "loss_iou": 0.416015625, + "loss_num": 0.042236328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 141655232, + "step": 2139 + }, + { + "epoch": 0.2003088875368559, + "grad_norm": 20.54629898071289, + "learning_rate": 5e-05, + "loss": 1.4583, + "num_input_tokens_seen": 141721376, + "step": 2140 + }, + { + "epoch": 0.2003088875368559, + "loss": 1.5018723011016846, + "loss_ce": 0.0028488750103861094, + "loss_iou": 0.6328125, + "loss_num": 0.0478515625, + "loss_xval": 1.5, + "num_input_tokens_seen": 141721376, + "step": 2140 + }, + { + "epoch": 0.20040248982075162, + "grad_norm": 32.34085464477539, + "learning_rate": 5e-05, + "loss": 1.3375, + "num_input_tokens_seen": 141787640, + "step": 2141 + }, + { + "epoch": 0.20040248982075162, + "loss": 1.2765312194824219, + "loss_ce": 0.00455861771479249, + "loss_iou": 0.515625, + "loss_num": 0.048583984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 141787640, + "step": 2141 + }, + { + "epoch": 0.20049609210464736, + "grad_norm": 45.27389907836914, + "learning_rate": 5e-05, + "loss": 1.5283, + "num_input_tokens_seen": 141853152, + "step": 2142 + }, + { + "epoch": 0.20049609210464736, + "loss": 1.6496539115905762, + "loss_ce": 0.010005595162510872, + "loss_iou": 0.68359375, + "loss_num": 0.054931640625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 141853152, + "step": 2142 + }, + { + "epoch": 0.20058969438854307, + "grad_norm": 14.894622802734375, + "learning_rate": 5e-05, + "loss": 1.2859, + "num_input_tokens_seen": 141919916, + "step": 2143 + }, + { + "epoch": 0.20058969438854307, + "loss": 1.3160537481307983, + "loss_ce": 0.00453033484518528, + "loss_iou": 0.5390625, + "loss_num": 0.046630859375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 141919916, + "step": 2143 + }, + { + "epoch": 0.2006832966724388, + "grad_norm": 14.653695106506348, + "learning_rate": 5e-05, + "loss": 1.3589, + "num_input_tokens_seen": 141985588, + "step": 2144 + }, + { + "epoch": 0.2006832966724388, + "loss": 1.351457118988037, + "loss_ce": 0.0033125763293355703, + "loss_iou": 0.5625, + "loss_num": 0.044189453125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 141985588, + "step": 2144 + }, + { + "epoch": 0.20077689895633452, + "grad_norm": 17.91266632080078, + "learning_rate": 5e-05, + "loss": 1.6382, + "num_input_tokens_seen": 142050996, + "step": 2145 + }, + { + "epoch": 0.20077689895633452, + "loss": 1.3563157320022583, + "loss_ce": 0.005241512320935726, + "loss_iou": 0.60546875, + "loss_num": 0.0277099609375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 142050996, + "step": 2145 + }, + { + "epoch": 0.20087050124023026, + "grad_norm": 28.426193237304688, + "learning_rate": 5e-05, + "loss": 1.4209, + "num_input_tokens_seen": 142116196, + "step": 2146 + }, + { + "epoch": 0.20087050124023026, + "loss": 1.5161666870117188, + "loss_ce": 0.00249484833329916, + "loss_iou": 0.625, + "loss_num": 0.052490234375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 142116196, + "step": 2146 + }, + { + "epoch": 0.200964103524126, + "grad_norm": 22.27088165283203, + "learning_rate": 5e-05, + "loss": 1.4221, + "num_input_tokens_seen": 142181700, + "step": 2147 + }, + { + "epoch": 0.200964103524126, + "loss": 1.6393663883209229, + "loss_ce": 0.0041125984862446785, + "loss_iou": 0.6875, + "loss_num": 0.05224609375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 142181700, + "step": 2147 + }, + { + "epoch": 0.2010577058080217, + "grad_norm": 19.2863712310791, + "learning_rate": 5e-05, + "loss": 1.546, + "num_input_tokens_seen": 142249104, + "step": 2148 + }, + { + "epoch": 0.2010577058080217, + "loss": 1.6800668239593506, + "loss_ce": 0.0023324843496084213, + "loss_iou": 0.7265625, + "loss_num": 0.045166015625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 142249104, + "step": 2148 + }, + { + "epoch": 0.20115130809191745, + "grad_norm": 18.831619262695312, + "learning_rate": 5e-05, + "loss": 1.0992, + "num_input_tokens_seen": 142315916, + "step": 2149 + }, + { + "epoch": 0.20115130809191745, + "loss": 1.0366709232330322, + "loss_ce": 0.005420956294983625, + "loss_iou": 0.45703125, + "loss_num": 0.0235595703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 142315916, + "step": 2149 + }, + { + "epoch": 0.20124491037581316, + "grad_norm": 19.836402893066406, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 142382940, + "step": 2150 + }, + { + "epoch": 0.20124491037581316, + "loss": 1.3474340438842773, + "loss_ce": 0.004172265063971281, + "loss_iou": 0.5625, + "loss_num": 0.043701171875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 142382940, + "step": 2150 + }, + { + "epoch": 0.2013385126597089, + "grad_norm": 16.515554428100586, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 142449124, + "step": 2151 + }, + { + "epoch": 0.2013385126597089, + "loss": 1.3000141382217407, + "loss_ce": 0.006557063199579716, + "loss_iou": 0.51953125, + "loss_num": 0.051025390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 142449124, + "step": 2151 + }, + { + "epoch": 0.20143211494360463, + "grad_norm": 35.38172149658203, + "learning_rate": 5e-05, + "loss": 1.3323, + "num_input_tokens_seen": 142514168, + "step": 2152 + }, + { + "epoch": 0.20143211494360463, + "loss": 1.446332573890686, + "loss_ce": 0.004560066852718592, + "loss_iou": 0.58203125, + "loss_num": 0.055908203125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 142514168, + "step": 2152 + }, + { + "epoch": 0.20152571722750034, + "grad_norm": 34.99225997924805, + "learning_rate": 5e-05, + "loss": 1.5669, + "num_input_tokens_seen": 142580064, + "step": 2153 + }, + { + "epoch": 0.20152571722750034, + "loss": 1.490980863571167, + "loss_ce": 0.006605847738683224, + "loss_iou": 0.6015625, + "loss_num": 0.056640625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 142580064, + "step": 2153 + }, + { + "epoch": 0.20161931951139608, + "grad_norm": 22.245737075805664, + "learning_rate": 5e-05, + "loss": 1.3902, + "num_input_tokens_seen": 142645520, + "step": 2154 + }, + { + "epoch": 0.20161931951139608, + "loss": 1.0189058780670166, + "loss_ce": 0.007004044950008392, + "loss_iou": 0.4453125, + "loss_num": 0.0247802734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 142645520, + "step": 2154 + }, + { + "epoch": 0.2017129217952918, + "grad_norm": 18.72137451171875, + "learning_rate": 5e-05, + "loss": 1.215, + "num_input_tokens_seen": 142711508, + "step": 2155 + }, + { + "epoch": 0.2017129217952918, + "loss": 1.3505170345306396, + "loss_ce": 0.006278828717768192, + "loss_iou": 0.53515625, + "loss_num": 0.05419921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 142711508, + "step": 2155 + }, + { + "epoch": 0.20180652407918753, + "grad_norm": 20.530357360839844, + "learning_rate": 5e-05, + "loss": 1.626, + "num_input_tokens_seen": 142776984, + "step": 2156 + }, + { + "epoch": 0.20180652407918753, + "loss": 1.7002286911010742, + "loss_ce": 0.004427995067089796, + "loss_iou": 0.70703125, + "loss_num": 0.056640625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 142776984, + "step": 2156 + }, + { + "epoch": 0.20190012636308327, + "grad_norm": 22.80133628845215, + "learning_rate": 5e-05, + "loss": 1.4426, + "num_input_tokens_seen": 142843388, + "step": 2157 + }, + { + "epoch": 0.20190012636308327, + "loss": 1.5896942615509033, + "loss_ce": 0.0037568435072898865, + "loss_iou": 0.6640625, + "loss_num": 0.052734375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 142843388, + "step": 2157 + }, + { + "epoch": 0.20199372864697898, + "grad_norm": 23.717363357543945, + "learning_rate": 5e-05, + "loss": 1.4548, + "num_input_tokens_seen": 142910168, + "step": 2158 + }, + { + "epoch": 0.20199372864697898, + "loss": 1.4617071151733398, + "loss_ce": 0.004675748758018017, + "loss_iou": 0.6171875, + "loss_num": 0.044189453125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 142910168, + "step": 2158 + }, + { + "epoch": 0.20208733093087472, + "grad_norm": 19.471845626831055, + "learning_rate": 5e-05, + "loss": 1.4269, + "num_input_tokens_seen": 142975132, + "step": 2159 + }, + { + "epoch": 0.20208733093087472, + "loss": 1.4465241432189941, + "loss_ce": 0.004141343291848898, + "loss_iou": 0.61328125, + "loss_num": 0.04296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 142975132, + "step": 2159 + }, + { + "epoch": 0.20218093321477043, + "grad_norm": 29.96786880493164, + "learning_rate": 5e-05, + "loss": 1.4185, + "num_input_tokens_seen": 143042128, + "step": 2160 + }, + { + "epoch": 0.20218093321477043, + "loss": 1.3426908254623413, + "loss_ce": 0.0018704948015511036, + "loss_iou": 0.5703125, + "loss_num": 0.03955078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 143042128, + "step": 2160 + }, + { + "epoch": 0.20227453549866617, + "grad_norm": 39.95287322998047, + "learning_rate": 5e-05, + "loss": 1.344, + "num_input_tokens_seen": 143106544, + "step": 2161 + }, + { + "epoch": 0.20227453549866617, + "loss": 1.298628807067871, + "loss_ce": 0.00175384059548378, + "loss_iou": 0.5625, + "loss_num": 0.034423828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 143106544, + "step": 2161 + }, + { + "epoch": 0.2023681377825619, + "grad_norm": 50.39260482788086, + "learning_rate": 5e-05, + "loss": 1.5972, + "num_input_tokens_seen": 143171920, + "step": 2162 + }, + { + "epoch": 0.2023681377825619, + "loss": 1.5182979106903076, + "loss_ce": 0.0036495246458798647, + "loss_iou": 0.66796875, + "loss_num": 0.035400390625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 143171920, + "step": 2162 + }, + { + "epoch": 0.20246174006645762, + "grad_norm": 11.106011390686035, + "learning_rate": 5e-05, + "loss": 1.3537, + "num_input_tokens_seen": 143238752, + "step": 2163 + }, + { + "epoch": 0.20246174006645762, + "loss": 1.4705227613449097, + "loss_ce": 0.004214137326925993, + "loss_iou": 0.6015625, + "loss_num": 0.05322265625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 143238752, + "step": 2163 + }, + { + "epoch": 0.20255534235035336, + "grad_norm": 15.916219711303711, + "learning_rate": 5e-05, + "loss": 1.5475, + "num_input_tokens_seen": 143304276, + "step": 2164 + }, + { + "epoch": 0.20255534235035336, + "loss": 1.6630849838256836, + "loss_ce": 0.00634677754715085, + "loss_iou": 0.66796875, + "loss_num": 0.06396484375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 143304276, + "step": 2164 + }, + { + "epoch": 0.20264894463424907, + "grad_norm": 25.220035552978516, + "learning_rate": 5e-05, + "loss": 1.3673, + "num_input_tokens_seen": 143370060, + "step": 2165 + }, + { + "epoch": 0.20264894463424907, + "loss": 1.121541976928711, + "loss_ce": 0.006185547914355993, + "loss_iou": 0.47265625, + "loss_num": 0.033935546875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 143370060, + "step": 2165 + }, + { + "epoch": 0.2027425469181448, + "grad_norm": 28.483713150024414, + "learning_rate": 5e-05, + "loss": 1.3473, + "num_input_tokens_seen": 143437080, + "step": 2166 + }, + { + "epoch": 0.2027425469181448, + "loss": 1.2359519004821777, + "loss_ce": 0.009877657517790794, + "loss_iou": 0.47265625, + "loss_num": 0.055908203125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 143437080, + "step": 2166 + }, + { + "epoch": 0.20283614920204052, + "grad_norm": 25.22542381286621, + "learning_rate": 5e-05, + "loss": 1.4368, + "num_input_tokens_seen": 143503224, + "step": 2167 + }, + { + "epoch": 0.20283614920204052, + "loss": 1.3459038734436035, + "loss_ce": 0.001177316065877676, + "loss_iou": 0.578125, + "loss_num": 0.03857421875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 143503224, + "step": 2167 + }, + { + "epoch": 0.20292975148593626, + "grad_norm": 24.992958068847656, + "learning_rate": 5e-05, + "loss": 1.3714, + "num_input_tokens_seen": 143569348, + "step": 2168 + }, + { + "epoch": 0.20292975148593626, + "loss": 1.4888062477111816, + "loss_ce": 0.006384441163390875, + "loss_iou": 0.546875, + "loss_num": 0.0771484375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 143569348, + "step": 2168 + }, + { + "epoch": 0.203023353769832, + "grad_norm": 22.52411651611328, + "learning_rate": 5e-05, + "loss": 1.3275, + "num_input_tokens_seen": 143635772, + "step": 2169 + }, + { + "epoch": 0.203023353769832, + "loss": 1.3241088390350342, + "loss_ce": 0.0030333655886352062, + "loss_iou": 0.57421875, + "loss_num": 0.03515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 143635772, + "step": 2169 + }, + { + "epoch": 0.2031169560537277, + "grad_norm": 42.44329833984375, + "learning_rate": 5e-05, + "loss": 1.4694, + "num_input_tokens_seen": 143700864, + "step": 2170 + }, + { + "epoch": 0.2031169560537277, + "loss": 1.4569463729858398, + "loss_ce": 0.006751062348484993, + "loss_iou": 0.61328125, + "loss_num": 0.045166015625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 143700864, + "step": 2170 + }, + { + "epoch": 0.20321055833762344, + "grad_norm": 22.192626953125, + "learning_rate": 5e-05, + "loss": 1.2989, + "num_input_tokens_seen": 143767608, + "step": 2171 + }, + { + "epoch": 0.20321055833762344, + "loss": 1.1069200038909912, + "loss_ce": 0.0029160603880882263, + "loss_iou": 0.48046875, + "loss_num": 0.0284423828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 143767608, + "step": 2171 + }, + { + "epoch": 0.20330416062151915, + "grad_norm": 35.91387939453125, + "learning_rate": 5e-05, + "loss": 1.4401, + "num_input_tokens_seen": 143834088, + "step": 2172 + }, + { + "epoch": 0.20330416062151915, + "loss": 1.5390979051589966, + "loss_ce": 0.002965106163173914, + "loss_iou": 0.671875, + "loss_num": 0.038818359375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 143834088, + "step": 2172 + }, + { + "epoch": 0.2033977629054149, + "grad_norm": 21.08890724182129, + "learning_rate": 5e-05, + "loss": 1.6687, + "num_input_tokens_seen": 143899788, + "step": 2173 + }, + { + "epoch": 0.2033977629054149, + "loss": 1.4316294193267822, + "loss_ce": 0.002918568905442953, + "loss_iou": 0.6328125, + "loss_num": 0.032958984375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 143899788, + "step": 2173 + }, + { + "epoch": 0.20349136518931063, + "grad_norm": 22.691059112548828, + "learning_rate": 5e-05, + "loss": 1.3941, + "num_input_tokens_seen": 143966944, + "step": 2174 + }, + { + "epoch": 0.20349136518931063, + "loss": 1.5247730016708374, + "loss_ce": 0.004265276715159416, + "loss_iou": 0.65625, + "loss_num": 0.041748046875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 143966944, + "step": 2174 + }, + { + "epoch": 0.20358496747320634, + "grad_norm": 42.85596466064453, + "learning_rate": 5e-05, + "loss": 1.2111, + "num_input_tokens_seen": 144033116, + "step": 2175 + }, + { + "epoch": 0.20358496747320634, + "loss": 1.2911250591278076, + "loss_ce": 0.0025508119724690914, + "loss_iou": 0.546875, + "loss_num": 0.039306640625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 144033116, + "step": 2175 + }, + { + "epoch": 0.20367856975710208, + "grad_norm": 30.698877334594727, + "learning_rate": 5e-05, + "loss": 1.5079, + "num_input_tokens_seen": 144099212, + "step": 2176 + }, + { + "epoch": 0.20367856975710208, + "loss": 1.4964704513549805, + "loss_ce": 0.008189158514142036, + "loss_iou": 0.61328125, + "loss_num": 0.052734375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 144099212, + "step": 2176 + }, + { + "epoch": 0.2037721720409978, + "grad_norm": 16.635744094848633, + "learning_rate": 5e-05, + "loss": 1.249, + "num_input_tokens_seen": 144165616, + "step": 2177 + }, + { + "epoch": 0.2037721720409978, + "loss": 1.2867045402526855, + "loss_ce": 0.005454451777040958, + "loss_iou": 0.5546875, + "loss_num": 0.03515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 144165616, + "step": 2177 + }, + { + "epoch": 0.20386577432489353, + "grad_norm": 21.607072830200195, + "learning_rate": 5e-05, + "loss": 1.621, + "num_input_tokens_seen": 144232660, + "step": 2178 + }, + { + "epoch": 0.20386577432489353, + "loss": 1.3934295177459717, + "loss_ce": 0.008663848042488098, + "loss_iou": 0.609375, + "loss_num": 0.03369140625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 144232660, + "step": 2178 + }, + { + "epoch": 0.20395937660878927, + "grad_norm": 32.10531997680664, + "learning_rate": 5e-05, + "loss": 1.4021, + "num_input_tokens_seen": 144299392, + "step": 2179 + }, + { + "epoch": 0.20395937660878927, + "loss": 1.331843376159668, + "loss_ce": 0.0056714159436523914, + "loss_iou": 0.6015625, + "loss_num": 0.0240478515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 144299392, + "step": 2179 + }, + { + "epoch": 0.20405297889268498, + "grad_norm": 21.30291175842285, + "learning_rate": 5e-05, + "loss": 1.7013, + "num_input_tokens_seen": 144365804, + "step": 2180 + }, + { + "epoch": 0.20405297889268498, + "loss": 1.5601857900619507, + "loss_ce": 0.004521731752902269, + "loss_iou": 0.640625, + "loss_num": 0.05517578125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 144365804, + "step": 2180 + }, + { + "epoch": 0.20414658117658072, + "grad_norm": 22.230737686157227, + "learning_rate": 5e-05, + "loss": 1.3447, + "num_input_tokens_seen": 144432600, + "step": 2181 + }, + { + "epoch": 0.20414658117658072, + "loss": 1.4684849977493286, + "loss_ce": 0.008035789243876934, + "loss_iou": 0.5859375, + "loss_num": 0.056884765625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 144432600, + "step": 2181 + }, + { + "epoch": 0.20424018346047643, + "grad_norm": 20.775821685791016, + "learning_rate": 5e-05, + "loss": 1.235, + "num_input_tokens_seen": 144498544, + "step": 2182 + }, + { + "epoch": 0.20424018346047643, + "loss": 1.1361083984375, + "loss_ce": 0.004028338938951492, + "loss_iou": 0.455078125, + "loss_num": 0.044677734375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 144498544, + "step": 2182 + }, + { + "epoch": 0.20433378574437217, + "grad_norm": 18.58045196533203, + "learning_rate": 5e-05, + "loss": 1.6382, + "num_input_tokens_seen": 144565964, + "step": 2183 + }, + { + "epoch": 0.20433378574437217, + "loss": 1.5659629106521606, + "loss_ce": 0.005904317833483219, + "loss_iou": 0.69140625, + "loss_num": 0.035888671875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 144565964, + "step": 2183 + }, + { + "epoch": 0.2044273880282679, + "grad_norm": 38.042015075683594, + "learning_rate": 5e-05, + "loss": 1.5261, + "num_input_tokens_seen": 144631488, + "step": 2184 + }, + { + "epoch": 0.2044273880282679, + "loss": 1.412172555923462, + "loss_ce": 0.004945940803736448, + "loss_iou": 0.59375, + "loss_num": 0.043212890625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 144631488, + "step": 2184 + }, + { + "epoch": 0.20452099031216361, + "grad_norm": 17.624834060668945, + "learning_rate": 5e-05, + "loss": 1.4384, + "num_input_tokens_seen": 144698352, + "step": 2185 + }, + { + "epoch": 0.20452099031216361, + "loss": 1.404240369796753, + "loss_ce": 0.004826275631785393, + "loss_iou": 0.57421875, + "loss_num": 0.05029296875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 144698352, + "step": 2185 + }, + { + "epoch": 0.20461459259605935, + "grad_norm": 46.20594787597656, + "learning_rate": 5e-05, + "loss": 1.4092, + "num_input_tokens_seen": 144765108, + "step": 2186 + }, + { + "epoch": 0.20461459259605935, + "loss": 1.4681873321533203, + "loss_ce": 0.005785033572465181, + "loss_iou": 0.60546875, + "loss_num": 0.0498046875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 144765108, + "step": 2186 + }, + { + "epoch": 0.20470819487995506, + "grad_norm": 17.88568878173828, + "learning_rate": 5e-05, + "loss": 1.5187, + "num_input_tokens_seen": 144830616, + "step": 2187 + }, + { + "epoch": 0.20470819487995506, + "loss": 1.8436431884765625, + "loss_ce": 0.010635368525981903, + "loss_iou": 0.76171875, + "loss_num": 0.0625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 144830616, + "step": 2187 + }, + { + "epoch": 0.2048017971638508, + "grad_norm": 37.66646957397461, + "learning_rate": 5e-05, + "loss": 1.5437, + "num_input_tokens_seen": 144897316, + "step": 2188 + }, + { + "epoch": 0.2048017971638508, + "loss": 1.565699815750122, + "loss_ce": 0.002711482811719179, + "loss_iou": 0.62109375, + "loss_num": 0.06396484375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 144897316, + "step": 2188 + }, + { + "epoch": 0.2048953994477465, + "grad_norm": 39.91148376464844, + "learning_rate": 5e-05, + "loss": 1.4788, + "num_input_tokens_seen": 144962820, + "step": 2189 + }, + { + "epoch": 0.2048953994477465, + "loss": 1.3915303945541382, + "loss_ce": 0.0038351090624928474, + "loss_iou": 0.56640625, + "loss_num": 0.051025390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 144962820, + "step": 2189 + }, + { + "epoch": 0.20498900173164225, + "grad_norm": 33.69110107421875, + "learning_rate": 5e-05, + "loss": 1.6458, + "num_input_tokens_seen": 145030904, + "step": 2190 + }, + { + "epoch": 0.20498900173164225, + "loss": 1.5528013706207275, + "loss_ce": 0.002996806986629963, + "loss_iou": 0.671875, + "loss_num": 0.04052734375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 145030904, + "step": 2190 + }, + { + "epoch": 0.205082604015538, + "grad_norm": 16.49057388305664, + "learning_rate": 5e-05, + "loss": 1.3206, + "num_input_tokens_seen": 145097788, + "step": 2191 + }, + { + "epoch": 0.205082604015538, + "loss": 1.3586344718933105, + "loss_ce": 0.0031656406354159117, + "loss_iou": 0.5625, + "loss_num": 0.047119140625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 145097788, + "step": 2191 + }, + { + "epoch": 0.2051762062994337, + "grad_norm": 27.333337783813477, + "learning_rate": 5e-05, + "loss": 1.415, + "num_input_tokens_seen": 145163536, + "step": 2192 + }, + { + "epoch": 0.2051762062994337, + "loss": 1.326690912246704, + "loss_ce": 0.004913484677672386, + "loss_iou": 0.5546875, + "loss_num": 0.0419921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 145163536, + "step": 2192 + }, + { + "epoch": 0.20526980858332944, + "grad_norm": 20.88427734375, + "learning_rate": 5e-05, + "loss": 1.5599, + "num_input_tokens_seen": 145228784, + "step": 2193 + }, + { + "epoch": 0.20526980858332944, + "loss": 1.6858747005462646, + "loss_ce": 0.009116966277360916, + "loss_iou": 0.71875, + "loss_num": 0.048583984375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 145228784, + "step": 2193 + }, + { + "epoch": 0.20536341086722515, + "grad_norm": 19.816139221191406, + "learning_rate": 5e-05, + "loss": 1.1869, + "num_input_tokens_seen": 145295832, + "step": 2194 + }, + { + "epoch": 0.20536341086722515, + "loss": 1.0105786323547363, + "loss_ce": 0.004841390997171402, + "loss_iou": 0.42578125, + "loss_num": 0.0301513671875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 145295832, + "step": 2194 + }, + { + "epoch": 0.2054570131511209, + "grad_norm": 21.31490135192871, + "learning_rate": 5e-05, + "loss": 1.455, + "num_input_tokens_seen": 145363284, + "step": 2195 + }, + { + "epoch": 0.2054570131511209, + "loss": 1.525984287261963, + "loss_ce": 0.004499888978898525, + "loss_iou": 0.6328125, + "loss_num": 0.051513671875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 145363284, + "step": 2195 + }, + { + "epoch": 0.20555061543501663, + "grad_norm": 16.395980834960938, + "learning_rate": 5e-05, + "loss": 1.4608, + "num_input_tokens_seen": 145428900, + "step": 2196 + }, + { + "epoch": 0.20555061543501663, + "loss": 1.323909044265747, + "loss_ce": 0.003962744493037462, + "loss_iou": 0.53515625, + "loss_num": 0.050048828125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 145428900, + "step": 2196 + }, + { + "epoch": 0.20564421771891234, + "grad_norm": 33.9736442565918, + "learning_rate": 5e-05, + "loss": 1.3005, + "num_input_tokens_seen": 145494208, + "step": 2197 + }, + { + "epoch": 0.20564421771891234, + "loss": 1.2018710374832153, + "loss_ce": 0.005581974517554045, + "loss_iou": 0.48828125, + "loss_num": 0.0439453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 145494208, + "step": 2197 + }, + { + "epoch": 0.20573782000280808, + "grad_norm": 32.992347717285156, + "learning_rate": 5e-05, + "loss": 1.6301, + "num_input_tokens_seen": 145559440, + "step": 2198 + }, + { + "epoch": 0.20573782000280808, + "loss": 1.7665610313415527, + "loss_ce": 0.003865695558488369, + "loss_iou": 0.74609375, + "loss_num": 0.0537109375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 145559440, + "step": 2198 + }, + { + "epoch": 0.20583142228670379, + "grad_norm": 19.87616539001465, + "learning_rate": 5e-05, + "loss": 1.2179, + "num_input_tokens_seen": 145625060, + "step": 2199 + }, + { + "epoch": 0.20583142228670379, + "loss": 1.1124231815338135, + "loss_ce": 0.003902629017829895, + "loss_iou": 0.46875, + "loss_num": 0.034423828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 145625060, + "step": 2199 + }, + { + "epoch": 0.20592502457059952, + "grad_norm": 16.862716674804688, + "learning_rate": 5e-05, + "loss": 1.1606, + "num_input_tokens_seen": 145691548, + "step": 2200 + }, + { + "epoch": 0.20592502457059952, + "loss": 1.068557858467102, + "loss_ce": 0.006057845428586006, + "loss_iou": 0.412109375, + "loss_num": 0.047607421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 145691548, + "step": 2200 + }, + { + "epoch": 0.20601862685449526, + "grad_norm": 16.87476921081543, + "learning_rate": 5e-05, + "loss": 1.2487, + "num_input_tokens_seen": 145758132, + "step": 2201 + }, + { + "epoch": 0.20601862685449526, + "loss": 1.3122081756591797, + "loss_ce": 0.004102656617760658, + "loss_iou": 0.5546875, + "loss_num": 0.039794921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 145758132, + "step": 2201 + }, + { + "epoch": 0.20611222913839097, + "grad_norm": 35.36105728149414, + "learning_rate": 5e-05, + "loss": 1.2631, + "num_input_tokens_seen": 145824952, + "step": 2202 + }, + { + "epoch": 0.20611222913839097, + "loss": 1.2746992111206055, + "loss_ce": 0.003703146940097213, + "loss_iou": 0.51953125, + "loss_num": 0.0458984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 145824952, + "step": 2202 + }, + { + "epoch": 0.2062058314222867, + "grad_norm": 20.086915969848633, + "learning_rate": 5e-05, + "loss": 1.5068, + "num_input_tokens_seen": 145890460, + "step": 2203 + }, + { + "epoch": 0.2062058314222867, + "loss": 1.3482638597488403, + "loss_ce": 0.010617414489388466, + "loss_iou": 0.5546875, + "loss_num": 0.0458984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 145890460, + "step": 2203 + }, + { + "epoch": 0.20629943370618242, + "grad_norm": 18.681596755981445, + "learning_rate": 5e-05, + "loss": 1.4599, + "num_input_tokens_seen": 145956848, + "step": 2204 + }, + { + "epoch": 0.20629943370618242, + "loss": 1.3786115646362305, + "loss_ce": 0.0060529257170856, + "loss_iou": 0.578125, + "loss_num": 0.04296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 145956848, + "step": 2204 + }, + { + "epoch": 0.20639303599007816, + "grad_norm": 16.152690887451172, + "learning_rate": 5e-05, + "loss": 1.4318, + "num_input_tokens_seen": 146023292, + "step": 2205 + }, + { + "epoch": 0.20639303599007816, + "loss": 1.682483434677124, + "loss_ce": 0.0037725758738815784, + "loss_iou": 0.671875, + "loss_num": 0.0673828125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 146023292, + "step": 2205 + }, + { + "epoch": 0.20648663827397387, + "grad_norm": 15.745396614074707, + "learning_rate": 5e-05, + "loss": 1.6109, + "num_input_tokens_seen": 146089560, + "step": 2206 + }, + { + "epoch": 0.20648663827397387, + "loss": 1.5467182397842407, + "loss_ce": 0.006190854590386152, + "loss_iou": 0.609375, + "loss_num": 0.064453125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 146089560, + "step": 2206 + }, + { + "epoch": 0.2065802405578696, + "grad_norm": 31.07707977294922, + "learning_rate": 5e-05, + "loss": 1.2601, + "num_input_tokens_seen": 146155592, + "step": 2207 + }, + { + "epoch": 0.2065802405578696, + "loss": 1.3511521816253662, + "loss_ce": 0.007402233779430389, + "loss_iou": 0.53515625, + "loss_num": 0.0546875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 146155592, + "step": 2207 + }, + { + "epoch": 0.20667384284176535, + "grad_norm": 23.022127151489258, + "learning_rate": 5e-05, + "loss": 1.4658, + "num_input_tokens_seen": 146222144, + "step": 2208 + }, + { + "epoch": 0.20667384284176535, + "loss": 1.1982890367507935, + "loss_ce": 0.007371045649051666, + "loss_iou": 0.5234375, + "loss_num": 0.0283203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 146222144, + "step": 2208 + }, + { + "epoch": 0.20676744512566106, + "grad_norm": 33.60017013549805, + "learning_rate": 5e-05, + "loss": 1.457, + "num_input_tokens_seen": 146289488, + "step": 2209 + }, + { + "epoch": 0.20676744512566106, + "loss": 1.6218219995498657, + "loss_ce": 0.004146199207752943, + "loss_iou": 0.65234375, + "loss_num": 0.0625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 146289488, + "step": 2209 + }, + { + "epoch": 0.2068610474095568, + "grad_norm": 25.5781192779541, + "learning_rate": 5e-05, + "loss": 1.6717, + "num_input_tokens_seen": 146355076, + "step": 2210 + }, + { + "epoch": 0.2068610474095568, + "loss": 1.6904634237289429, + "loss_ce": 0.0039400034584105015, + "loss_iou": 0.71484375, + "loss_num": 0.051025390625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 146355076, + "step": 2210 + }, + { + "epoch": 0.2069546496934525, + "grad_norm": 10.948907852172852, + "learning_rate": 5e-05, + "loss": 1.238, + "num_input_tokens_seen": 146420240, + "step": 2211 + }, + { + "epoch": 0.2069546496934525, + "loss": 1.414231300354004, + "loss_ce": 0.007981323637068272, + "loss_iou": 0.5234375, + "loss_num": 0.07177734375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 146420240, + "step": 2211 + }, + { + "epoch": 0.20704825197734825, + "grad_norm": 20.03610610961914, + "learning_rate": 5e-05, + "loss": 1.4347, + "num_input_tokens_seen": 146486432, + "step": 2212 + }, + { + "epoch": 0.20704825197734825, + "loss": 1.4449725151062012, + "loss_ce": 0.004542850889265537, + "loss_iou": 0.57421875, + "loss_num": 0.0576171875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 146486432, + "step": 2212 + }, + { + "epoch": 0.20714185426124399, + "grad_norm": 15.121575355529785, + "learning_rate": 5e-05, + "loss": 1.3037, + "num_input_tokens_seen": 146551664, + "step": 2213 + }, + { + "epoch": 0.20714185426124399, + "loss": 1.372945785522461, + "loss_ce": 0.008047164417803288, + "loss_iou": 0.6015625, + "loss_num": 0.031494140625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 146551664, + "step": 2213 + }, + { + "epoch": 0.2072354565451397, + "grad_norm": 21.025821685791016, + "learning_rate": 5e-05, + "loss": 1.2563, + "num_input_tokens_seen": 146618556, + "step": 2214 + }, + { + "epoch": 0.2072354565451397, + "loss": 1.1853673458099365, + "loss_ce": 0.0037267024163156748, + "loss_iou": 0.5078125, + "loss_num": 0.032470703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 146618556, + "step": 2214 + }, + { + "epoch": 0.20732905882903543, + "grad_norm": 27.402463912963867, + "learning_rate": 5e-05, + "loss": 1.6821, + "num_input_tokens_seen": 146684372, + "step": 2215 + }, + { + "epoch": 0.20732905882903543, + "loss": 1.7625792026519775, + "loss_ce": 0.006719916593283415, + "loss_iou": 0.73046875, + "loss_num": 0.05859375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 146684372, + "step": 2215 + }, + { + "epoch": 0.20742266111293114, + "grad_norm": 20.352569580078125, + "learning_rate": 5e-05, + "loss": 1.227, + "num_input_tokens_seen": 146750312, + "step": 2216 + }, + { + "epoch": 0.20742266111293114, + "loss": 1.1586307287216187, + "loss_ce": 0.004822149872779846, + "loss_iou": 0.5234375, + "loss_num": 0.02197265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 146750312, + "step": 2216 + }, + { + "epoch": 0.20751626339682688, + "grad_norm": 20.267181396484375, + "learning_rate": 5e-05, + "loss": 1.4477, + "num_input_tokens_seen": 146816936, + "step": 2217 + }, + { + "epoch": 0.20751626339682688, + "loss": 1.2870581150054932, + "loss_ce": 0.007761204615235329, + "loss_iou": 0.5546875, + "loss_num": 0.03466796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 146816936, + "step": 2217 + }, + { + "epoch": 0.20760986568072262, + "grad_norm": 16.633872985839844, + "learning_rate": 5e-05, + "loss": 1.2765, + "num_input_tokens_seen": 146883120, + "step": 2218 + }, + { + "epoch": 0.20760986568072262, + "loss": 1.0969173908233643, + "loss_ce": 0.0026791575364768505, + "loss_iou": 0.4296875, + "loss_num": 0.046875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 146883120, + "step": 2218 + }, + { + "epoch": 0.20770346796461833, + "grad_norm": 14.718212127685547, + "learning_rate": 5e-05, + "loss": 1.3418, + "num_input_tokens_seen": 146949108, + "step": 2219 + }, + { + "epoch": 0.20770346796461833, + "loss": 1.2713162899017334, + "loss_ce": 0.005691338796168566, + "loss_iou": 0.4921875, + "loss_num": 0.055908203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 146949108, + "step": 2219 + }, + { + "epoch": 0.20779707024851407, + "grad_norm": 16.255435943603516, + "learning_rate": 5e-05, + "loss": 1.2042, + "num_input_tokens_seen": 147015480, + "step": 2220 + }, + { + "epoch": 0.20779707024851407, + "loss": 1.3084746599197388, + "loss_ce": 0.0037871471140533686, + "loss_iou": 0.52734375, + "loss_num": 0.049560546875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 147015480, + "step": 2220 + }, + { + "epoch": 0.20789067253240978, + "grad_norm": 18.149707794189453, + "learning_rate": 5e-05, + "loss": 1.3916, + "num_input_tokens_seen": 147081252, + "step": 2221 + }, + { + "epoch": 0.20789067253240978, + "loss": 1.2870043516159058, + "loss_ce": 0.010637231171131134, + "loss_iou": 0.51171875, + "loss_num": 0.05029296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 147081252, + "step": 2221 + }, + { + "epoch": 0.20798427481630552, + "grad_norm": 15.424684524536133, + "learning_rate": 5e-05, + "loss": 1.2039, + "num_input_tokens_seen": 147148376, + "step": 2222 + }, + { + "epoch": 0.20798427481630552, + "loss": 0.9300609827041626, + "loss_ce": 0.002570751588791609, + "loss_iou": 0.388671875, + "loss_num": 0.0301513671875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 147148376, + "step": 2222 + }, + { + "epoch": 0.20807787710020126, + "grad_norm": 23.329143524169922, + "learning_rate": 5e-05, + "loss": 1.2619, + "num_input_tokens_seen": 147214676, + "step": 2223 + }, + { + "epoch": 0.20807787710020126, + "loss": 1.3454594612121582, + "loss_ce": 0.0046391854993999004, + "loss_iou": 0.5859375, + "loss_num": 0.0341796875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 147214676, + "step": 2223 + }, + { + "epoch": 0.20817147938409697, + "grad_norm": 22.675811767578125, + "learning_rate": 5e-05, + "loss": 1.541, + "num_input_tokens_seen": 147281548, + "step": 2224 + }, + { + "epoch": 0.20817147938409697, + "loss": 1.6554572582244873, + "loss_ce": 0.004090028814971447, + "loss_iou": 0.72265625, + "loss_num": 0.040771484375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 147281548, + "step": 2224 + }, + { + "epoch": 0.2082650816679927, + "grad_norm": 89.36438751220703, + "learning_rate": 5e-05, + "loss": 1.4125, + "num_input_tokens_seen": 147348384, + "step": 2225 + }, + { + "epoch": 0.2082650816679927, + "loss": 1.6650135517120361, + "loss_ce": 0.009740199893712997, + "loss_iou": 0.68359375, + "loss_num": 0.05712890625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 147348384, + "step": 2225 + }, + { + "epoch": 0.20835868395188842, + "grad_norm": 84.10919952392578, + "learning_rate": 5e-05, + "loss": 1.5184, + "num_input_tokens_seen": 147414920, + "step": 2226 + }, + { + "epoch": 0.20835868395188842, + "loss": 1.56592857837677, + "loss_ce": 0.0073348539881408215, + "loss_iou": 0.6875, + "loss_num": 0.036865234375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 147414920, + "step": 2226 + }, + { + "epoch": 0.20845228623578416, + "grad_norm": 14.97479248046875, + "learning_rate": 5e-05, + "loss": 1.441, + "num_input_tokens_seen": 147480396, + "step": 2227 + }, + { + "epoch": 0.20845228623578416, + "loss": 1.5814578533172607, + "loss_ce": 0.007239131256937981, + "loss_iou": 0.60546875, + "loss_num": 0.07275390625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 147480396, + "step": 2227 + }, + { + "epoch": 0.20854588851967987, + "grad_norm": 40.541664123535156, + "learning_rate": 5e-05, + "loss": 1.4666, + "num_input_tokens_seen": 147546120, + "step": 2228 + }, + { + "epoch": 0.20854588851967987, + "loss": 1.5482265949249268, + "loss_ce": 0.004281272180378437, + "loss_iou": 0.62890625, + "loss_num": 0.05712890625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 147546120, + "step": 2228 + }, + { + "epoch": 0.2086394908035756, + "grad_norm": 34.50336456298828, + "learning_rate": 5e-05, + "loss": 1.508, + "num_input_tokens_seen": 147612456, + "step": 2229 + }, + { + "epoch": 0.2086394908035756, + "loss": 1.5039300918579102, + "loss_ce": 0.0049066124483942986, + "loss_iou": 0.59765625, + "loss_num": 0.061279296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 147612456, + "step": 2229 + }, + { + "epoch": 0.20873309308747134, + "grad_norm": 23.92495346069336, + "learning_rate": 5e-05, + "loss": 1.3672, + "num_input_tokens_seen": 147678372, + "step": 2230 + }, + { + "epoch": 0.20873309308747134, + "loss": 1.2686617374420166, + "loss_ce": 0.008163605816662312, + "loss_iou": 0.54296875, + "loss_num": 0.035400390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 147678372, + "step": 2230 + }, + { + "epoch": 0.20882669537136705, + "grad_norm": 54.392547607421875, + "learning_rate": 5e-05, + "loss": 1.6349, + "num_input_tokens_seen": 147744796, + "step": 2231 + }, + { + "epoch": 0.20882669537136705, + "loss": 1.6422569751739502, + "loss_ce": 0.00944453664124012, + "loss_iou": 0.6875, + "loss_num": 0.0517578125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 147744796, + "step": 2231 + }, + { + "epoch": 0.2089202976552628, + "grad_norm": 20.766681671142578, + "learning_rate": 5e-05, + "loss": 1.8613, + "num_input_tokens_seen": 147811408, + "step": 2232 + }, + { + "epoch": 0.2089202976552628, + "loss": 1.9362826347351074, + "loss_ce": 0.00464208796620369, + "loss_iou": 0.8125, + "loss_num": 0.06201171875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 147811408, + "step": 2232 + }, + { + "epoch": 0.2090138999391585, + "grad_norm": 56.70877456665039, + "learning_rate": 5e-05, + "loss": 1.3871, + "num_input_tokens_seen": 147877692, + "step": 2233 + }, + { + "epoch": 0.2090138999391585, + "loss": 1.3354027271270752, + "loss_ce": 0.004348025657236576, + "loss_iou": 0.5703125, + "loss_num": 0.0380859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 147877692, + "step": 2233 + }, + { + "epoch": 0.20910750222305424, + "grad_norm": 34.47943115234375, + "learning_rate": 5e-05, + "loss": 1.5514, + "num_input_tokens_seen": 147943848, + "step": 2234 + }, + { + "epoch": 0.20910750222305424, + "loss": 1.5931035280227661, + "loss_ce": 0.006677714176476002, + "loss_iou": 0.63671875, + "loss_num": 0.06298828125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 147943848, + "step": 2234 + }, + { + "epoch": 0.20920110450694998, + "grad_norm": 18.903854370117188, + "learning_rate": 5e-05, + "loss": 1.8535, + "num_input_tokens_seen": 148010672, + "step": 2235 + }, + { + "epoch": 0.20920110450694998, + "loss": 1.7717020511627197, + "loss_ce": 0.010959910228848457, + "loss_iou": 0.75390625, + "loss_num": 0.051513671875, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 148010672, + "step": 2235 + }, + { + "epoch": 0.2092947067908457, + "grad_norm": 52.18496322631836, + "learning_rate": 5e-05, + "loss": 1.3746, + "num_input_tokens_seen": 148077216, + "step": 2236 + }, + { + "epoch": 0.2092947067908457, + "loss": 1.3000686168670654, + "loss_ce": 0.005634992383420467, + "loss_iou": 0.5234375, + "loss_num": 0.049072265625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 148077216, + "step": 2236 + }, + { + "epoch": 0.20938830907474143, + "grad_norm": 65.09969329833984, + "learning_rate": 5e-05, + "loss": 1.4046, + "num_input_tokens_seen": 148143628, + "step": 2237 + }, + { + "epoch": 0.20938830907474143, + "loss": 1.4511845111846924, + "loss_ce": 0.007336852140724659, + "loss_iou": 0.5625, + "loss_num": 0.0634765625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 148143628, + "step": 2237 + }, + { + "epoch": 0.20948191135863714, + "grad_norm": 41.23759078979492, + "learning_rate": 5e-05, + "loss": 1.6012, + "num_input_tokens_seen": 148210352, + "step": 2238 + }, + { + "epoch": 0.20948191135863714, + "loss": 1.45740807056427, + "loss_ce": 0.0033065066672861576, + "loss_iou": 0.61328125, + "loss_num": 0.044677734375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 148210352, + "step": 2238 + }, + { + "epoch": 0.20957551364253288, + "grad_norm": 35.63227844238281, + "learning_rate": 5e-05, + "loss": 1.5818, + "num_input_tokens_seen": 148277092, + "step": 2239 + }, + { + "epoch": 0.20957551364253288, + "loss": 1.6763197183609009, + "loss_ce": 0.008350997231900692, + "loss_iou": 0.67578125, + "loss_num": 0.06396484375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 148277092, + "step": 2239 + }, + { + "epoch": 0.20966911592642862, + "grad_norm": 24.028881072998047, + "learning_rate": 5e-05, + "loss": 1.6909, + "num_input_tokens_seen": 148343912, + "step": 2240 + }, + { + "epoch": 0.20966911592642862, + "loss": 1.7737795114517212, + "loss_ce": 0.002295173704624176, + "loss_iou": 0.76953125, + "loss_num": 0.04638671875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 148343912, + "step": 2240 + }, + { + "epoch": 0.20976271821032433, + "grad_norm": 38.34393310546875, + "learning_rate": 5e-05, + "loss": 1.5307, + "num_input_tokens_seen": 148409256, + "step": 2241 + }, + { + "epoch": 0.20976271821032433, + "loss": 1.6319129467010498, + "loss_ce": 0.003983210772275925, + "loss_iou": 0.671875, + "loss_num": 0.056884765625, + "loss_xval": 1.625, + "num_input_tokens_seen": 148409256, + "step": 2241 + }, + { + "epoch": 0.20985632049422007, + "grad_norm": 36.2247314453125, + "learning_rate": 5e-05, + "loss": 1.4455, + "num_input_tokens_seen": 148474748, + "step": 2242 + }, + { + "epoch": 0.20985632049422007, + "loss": 1.538226842880249, + "loss_ce": 0.005512088071554899, + "loss_iou": 0.6171875, + "loss_num": 0.06005859375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 148474748, + "step": 2242 + }, + { + "epoch": 0.20994992277811578, + "grad_norm": 19.64275550842285, + "learning_rate": 5e-05, + "loss": 1.6078, + "num_input_tokens_seen": 148541100, + "step": 2243 + }, + { + "epoch": 0.20994992277811578, + "loss": 1.5026966333389282, + "loss_ce": 0.007579442113637924, + "loss_iou": 0.65234375, + "loss_num": 0.03857421875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 148541100, + "step": 2243 + }, + { + "epoch": 0.21004352506201152, + "grad_norm": 14.244067192077637, + "learning_rate": 5e-05, + "loss": 1.2634, + "num_input_tokens_seen": 148607516, + "step": 2244 + }, + { + "epoch": 0.21004352506201152, + "loss": 1.3142645359039307, + "loss_ce": 0.0042059896513819695, + "loss_iou": 0.5546875, + "loss_num": 0.040771484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 148607516, + "step": 2244 + }, + { + "epoch": 0.21013712734590723, + "grad_norm": 22.575939178466797, + "learning_rate": 5e-05, + "loss": 1.2309, + "num_input_tokens_seen": 148673148, + "step": 2245 + }, + { + "epoch": 0.21013712734590723, + "loss": 1.457908272743225, + "loss_ce": 0.0028301426209509373, + "loss_iou": 0.6015625, + "loss_num": 0.050048828125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 148673148, + "step": 2245 + }, + { + "epoch": 0.21023072962980296, + "grad_norm": 23.176837921142578, + "learning_rate": 5e-05, + "loss": 1.3858, + "num_input_tokens_seen": 148739024, + "step": 2246 + }, + { + "epoch": 0.21023072962980296, + "loss": 1.5564404726028442, + "loss_ce": 0.002241305308416486, + "loss_iou": 0.66015625, + "loss_num": 0.04638671875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 148739024, + "step": 2246 + }, + { + "epoch": 0.2103243319136987, + "grad_norm": 78.06816864013672, + "learning_rate": 5e-05, + "loss": 1.4197, + "num_input_tokens_seen": 148805784, + "step": 2247 + }, + { + "epoch": 0.2103243319136987, + "loss": 1.2678524255752563, + "loss_ce": 0.005645415745675564, + "loss_iou": 0.54296875, + "loss_num": 0.03515625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 148805784, + "step": 2247 + }, + { + "epoch": 0.21041793419759441, + "grad_norm": 18.7746639251709, + "learning_rate": 5e-05, + "loss": 1.7969, + "num_input_tokens_seen": 148872216, + "step": 2248 + }, + { + "epoch": 0.21041793419759441, + "loss": 1.6653659343719482, + "loss_ce": 0.0017916755750775337, + "loss_iou": 0.69921875, + "loss_num": 0.053466796875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 148872216, + "step": 2248 + }, + { + "epoch": 0.21051153648149015, + "grad_norm": 25.165340423583984, + "learning_rate": 5e-05, + "loss": 1.4906, + "num_input_tokens_seen": 148938992, + "step": 2249 + }, + { + "epoch": 0.21051153648149015, + "loss": 1.5340664386749268, + "loss_ce": 0.004769505932927132, + "loss_iou": 0.6171875, + "loss_num": 0.05908203125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 148938992, + "step": 2249 + }, + { + "epoch": 0.21060513876538586, + "grad_norm": 25.190000534057617, + "learning_rate": 5e-05, + "loss": 1.5433, + "num_input_tokens_seen": 149006232, + "step": 2250 + }, + { + "epoch": 0.21060513876538586, + "eval_seeclick_CIoU": 0.1746026836335659, + "eval_seeclick_GIoU": 0.18487906455993652, + "eval_seeclick_IoU": 0.27577562630176544, + "eval_seeclick_MAE_all": 0.14121928066015244, + "eval_seeclick_MAE_h": 0.0973590612411499, + "eval_seeclick_MAE_w": 0.09571099653840065, + "eval_seeclick_MAE_x_boxes": 0.21197088062763214, + "eval_seeclick_MAE_y_boxes": 0.09960607439279556, + "eval_seeclick_NUM_probability": 0.9994820952415466, + "eval_seeclick_inside_bbox": 0.4541666805744171, + "eval_seeclick_loss": 2.4197990894317627, + "eval_seeclick_loss_ce": 0.013916734606027603, + "eval_seeclick_loss_iou": 0.8665771484375, + "eval_seeclick_loss_num": 0.14796829223632812, + "eval_seeclick_loss_xval": 2.47119140625, + "eval_seeclick_runtime": 62.9995, + "eval_seeclick_samples_per_second": 0.746, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 149006232, + "step": 2250 + }, + { + "epoch": 0.21060513876538586, + "eval_icons_CIoU": -0.15117041021585464, + "eval_icons_GIoU": -0.06868918985128403, + "eval_icons_IoU": 0.052425259724259377, + "eval_icons_MAE_all": 0.20415173470973969, + "eval_icons_MAE_h": 0.22264737635850906, + "eval_icons_MAE_w": 0.18965063244104385, + "eval_icons_MAE_x_boxes": 0.12681209295988083, + "eval_icons_MAE_y_boxes": 0.1221475638449192, + "eval_icons_NUM_probability": 0.9999066889286041, + "eval_icons_inside_bbox": 0.1145833358168602, + "eval_icons_loss": 3.141892433166504, + "eval_icons_loss_ce": 1.8032216758001596e-05, + "eval_icons_loss_iou": 1.07275390625, + "eval_icons_loss_num": 0.197662353515625, + "eval_icons_loss_xval": 3.1328125, + "eval_icons_runtime": 69.1361, + "eval_icons_samples_per_second": 0.723, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 149006232, + "step": 2250 + }, + { + "epoch": 0.21060513876538586, + "eval_screenspot_CIoU": -0.016074684758981068, + "eval_screenspot_GIoU": 0.01664178321758906, + "eval_screenspot_IoU": 0.16272087146838507, + "eval_screenspot_MAE_all": 0.20578849812348685, + "eval_screenspot_MAE_h": 0.18250097831090292, + "eval_screenspot_MAE_w": 0.19865205387274423, + "eval_screenspot_MAE_x_boxes": 0.22381174067656198, + "eval_screenspot_MAE_y_boxes": 0.1280993570884069, + "eval_screenspot_NUM_probability": 0.9997875094413757, + "eval_screenspot_inside_bbox": 0.3312500019868215, + "eval_screenspot_loss": 3.0338799953460693, + "eval_screenspot_loss_ce": 0.011708071455359459, + "eval_screenspot_loss_iou": 0.994140625, + "eval_screenspot_loss_num": 0.21026611328125, + "eval_screenspot_loss_xval": 3.041015625, + "eval_screenspot_runtime": 116.4531, + "eval_screenspot_samples_per_second": 0.764, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 149006232, + "step": 2250 + }, + { + "epoch": 0.21060513876538586, + "eval_compot_CIoU": -0.08576418831944466, + "eval_compot_GIoU": -0.027402309700846672, + "eval_compot_IoU": 0.09865068644285202, + "eval_compot_MAE_all": 0.22077596932649612, + "eval_compot_MAE_h": 0.20852911472320557, + "eval_compot_MAE_w": 0.24083954840898514, + "eval_compot_MAE_x_boxes": 0.1733434721827507, + "eval_compot_MAE_y_boxes": 0.10590603947639465, + "eval_compot_NUM_probability": 0.9998809397220612, + "eval_compot_inside_bbox": 0.1927083358168602, + "eval_compot_loss": 3.1436450481414795, + "eval_compot_loss_ce": 0.0049590670969337225, + "eval_compot_loss_iou": 1.023681640625, + "eval_compot_loss_num": 0.22637939453125, + "eval_compot_loss_xval": 3.177734375, + "eval_compot_runtime": 71.2026, + "eval_compot_samples_per_second": 0.702, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 149006232, + "step": 2250 + }, + { + "epoch": 0.21060513876538586, + "eval_custom_ui_MAE_all": 0.14865753799676895, + "eval_custom_ui_MAE_x": 0.1338840276002884, + "eval_custom_ui_MAE_y": 0.16343104094266891, + "eval_custom_ui_NUM_probability": 0.9999413192272186, + "eval_custom_ui_loss": 0.833726167678833, + "eval_custom_ui_loss_ce": 0.16179928183555603, + "eval_custom_ui_loss_num": 0.140472412109375, + "eval_custom_ui_loss_xval": 0.702880859375, + "eval_custom_ui_runtime": 51.0191, + "eval_custom_ui_samples_per_second": 0.98, + "eval_custom_ui_steps_per_second": 0.039, + "num_input_tokens_seen": 149006232, + "step": 2250 + }, + { + "epoch": 0.21060513876538586, + "loss": 0.9273938536643982, + "loss_ce": 0.18276497721672058, + "loss_iou": 0.0, + "loss_num": 0.1484375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 149006232, + "step": 2250 + }, + { + "epoch": 0.2106987410492816, + "grad_norm": 20.41312599182129, + "learning_rate": 5e-05, + "loss": 1.6266, + "num_input_tokens_seen": 149074428, + "step": 2251 + }, + { + "epoch": 0.2106987410492816, + "loss": 1.5528807640075684, + "loss_ce": 0.003564316313713789, + "loss_iou": 0.640625, + "loss_num": 0.053466796875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 149074428, + "step": 2251 + }, + { + "epoch": 0.21079234333317734, + "grad_norm": 21.525434494018555, + "learning_rate": 5e-05, + "loss": 1.3801, + "num_input_tokens_seen": 149141204, + "step": 2252 + }, + { + "epoch": 0.21079234333317734, + "loss": 1.443589687347412, + "loss_ce": 0.004624864086508751, + "loss_iou": 0.60546875, + "loss_num": 0.045166015625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 149141204, + "step": 2252 + }, + { + "epoch": 0.21088594561707305, + "grad_norm": 20.107213973999023, + "learning_rate": 5e-05, + "loss": 1.5497, + "num_input_tokens_seen": 149207932, + "step": 2253 + }, + { + "epoch": 0.21088594561707305, + "loss": 1.4908183813095093, + "loss_ce": 0.007908225059509277, + "loss_iou": 0.625, + "loss_num": 0.045654296875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 149207932, + "step": 2253 + }, + { + "epoch": 0.2109795479009688, + "grad_norm": 61.2882194519043, + "learning_rate": 5e-05, + "loss": 1.0448, + "num_input_tokens_seen": 149272128, + "step": 2254 + }, + { + "epoch": 0.2109795479009688, + "loss": 1.0997624397277832, + "loss_ce": 0.002594493795186281, + "loss_iou": 0.453125, + "loss_num": 0.037841796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 149272128, + "step": 2254 + }, + { + "epoch": 0.2110731501848645, + "grad_norm": 21.424177169799805, + "learning_rate": 5e-05, + "loss": 1.4858, + "num_input_tokens_seen": 149338100, + "step": 2255 + }, + { + "epoch": 0.2110731501848645, + "loss": 1.6285836696624756, + "loss_ce": 0.0035836196038872004, + "loss_iou": 0.69921875, + "loss_num": 0.0458984375, + "loss_xval": 1.625, + "num_input_tokens_seen": 149338100, + "step": 2255 + }, + { + "epoch": 0.21116675246876024, + "grad_norm": 19.440874099731445, + "learning_rate": 5e-05, + "loss": 1.4271, + "num_input_tokens_seen": 149405908, + "step": 2256 + }, + { + "epoch": 0.21116675246876024, + "loss": 1.5703045129776, + "loss_ce": 0.0014568158658221364, + "loss_iou": 0.6875, + "loss_num": 0.0380859375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 149405908, + "step": 2256 + }, + { + "epoch": 0.21126035475265598, + "grad_norm": 13.495523452758789, + "learning_rate": 5e-05, + "loss": 1.2129, + "num_input_tokens_seen": 149471956, + "step": 2257 + }, + { + "epoch": 0.21126035475265598, + "loss": 1.3002811670303345, + "loss_ce": 0.0019413732225075364, + "loss_iou": 0.546875, + "loss_num": 0.041748046875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 149471956, + "step": 2257 + }, + { + "epoch": 0.2113539570365517, + "grad_norm": 20.041250228881836, + "learning_rate": 5e-05, + "loss": 1.1498, + "num_input_tokens_seen": 149537104, + "step": 2258 + }, + { + "epoch": 0.2113539570365517, + "loss": 1.2770624160766602, + "loss_ce": 0.005089844577014446, + "loss_iou": 0.57421875, + "loss_num": 0.0245361328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 149537104, + "step": 2258 + }, + { + "epoch": 0.21144755932044743, + "grad_norm": 33.957183837890625, + "learning_rate": 5e-05, + "loss": 1.5538, + "num_input_tokens_seen": 149602036, + "step": 2259 + }, + { + "epoch": 0.21144755932044743, + "loss": 1.463841438293457, + "loss_ce": 0.005833700764924288, + "loss_iou": 0.64453125, + "loss_num": 0.032958984375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 149602036, + "step": 2259 + }, + { + "epoch": 0.21154116160434314, + "grad_norm": 189.0176239013672, + "learning_rate": 5e-05, + "loss": 1.7054, + "num_input_tokens_seen": 149668420, + "step": 2260 + }, + { + "epoch": 0.21154116160434314, + "loss": 1.7047916650772095, + "loss_ce": 0.00654939329251647, + "loss_iou": 0.734375, + "loss_num": 0.046630859375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 149668420, + "step": 2260 + }, + { + "epoch": 0.21163476388823887, + "grad_norm": 25.923681259155273, + "learning_rate": 5e-05, + "loss": 1.5028, + "num_input_tokens_seen": 149735604, + "step": 2261 + }, + { + "epoch": 0.21163476388823887, + "loss": 1.4564383029937744, + "loss_ce": 0.006242984440177679, + "loss_iou": 0.59765625, + "loss_num": 0.0517578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 149735604, + "step": 2261 + }, + { + "epoch": 0.2117283661721346, + "grad_norm": 30.270620346069336, + "learning_rate": 5e-05, + "loss": 1.2854, + "num_input_tokens_seen": 149801416, + "step": 2262 + }, + { + "epoch": 0.2117283661721346, + "loss": 1.1462156772613525, + "loss_ce": 0.009740954264998436, + "loss_iou": 0.4765625, + "loss_num": 0.036376953125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 149801416, + "step": 2262 + }, + { + "epoch": 0.21182196845603032, + "grad_norm": 22.751750946044922, + "learning_rate": 5e-05, + "loss": 1.3604, + "num_input_tokens_seen": 149866496, + "step": 2263 + }, + { + "epoch": 0.21182196845603032, + "loss": 1.426013708114624, + "loss_ce": 0.00609190808609128, + "loss_iou": 0.64453125, + "loss_num": 0.0263671875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 149866496, + "step": 2263 + }, + { + "epoch": 0.21191557073992606, + "grad_norm": 17.920101165771484, + "learning_rate": 5e-05, + "loss": 1.4721, + "num_input_tokens_seen": 149932904, + "step": 2264 + }, + { + "epoch": 0.21191557073992606, + "loss": 1.6346955299377441, + "loss_ce": 0.005300988908857107, + "loss_iou": 0.61328125, + "loss_num": 0.08056640625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 149932904, + "step": 2264 + }, + { + "epoch": 0.21200917302382177, + "grad_norm": 17.05817222595215, + "learning_rate": 5e-05, + "loss": 1.3378, + "num_input_tokens_seen": 149999576, + "step": 2265 + }, + { + "epoch": 0.21200917302382177, + "loss": 1.4109833240509033, + "loss_ce": 0.005221586674451828, + "loss_iou": 0.6015625, + "loss_num": 0.041015625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 149999576, + "step": 2265 + }, + { + "epoch": 0.2121027753077175, + "grad_norm": 28.452831268310547, + "learning_rate": 5e-05, + "loss": 1.3323, + "num_input_tokens_seen": 150064768, + "step": 2266 + }, + { + "epoch": 0.2121027753077175, + "loss": 1.3750596046447754, + "loss_ce": 0.014219718053936958, + "loss_iou": 0.5390625, + "loss_num": 0.056884765625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 150064768, + "step": 2266 + }, + { + "epoch": 0.21219637759161322, + "grad_norm": 18.17842674255371, + "learning_rate": 5e-05, + "loss": 1.4363, + "num_input_tokens_seen": 150129856, + "step": 2267 + }, + { + "epoch": 0.21219637759161322, + "loss": 1.4692213535308838, + "loss_ce": 0.013166775926947594, + "loss_iou": 0.58984375, + "loss_num": 0.0556640625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 150129856, + "step": 2267 + }, + { + "epoch": 0.21228997987550896, + "grad_norm": 14.280591011047363, + "learning_rate": 5e-05, + "loss": 1.3063, + "num_input_tokens_seen": 150196056, + "step": 2268 + }, + { + "epoch": 0.21228997987550896, + "loss": 1.3882437944412231, + "loss_ce": 0.0025016251020133495, + "loss_iou": 0.5703125, + "loss_num": 0.0498046875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 150196056, + "step": 2268 + }, + { + "epoch": 0.2123835821594047, + "grad_norm": 34.965476989746094, + "learning_rate": 5e-05, + "loss": 1.4558, + "num_input_tokens_seen": 150262864, + "step": 2269 + }, + { + "epoch": 0.2123835821594047, + "loss": 1.3500034809112549, + "loss_ce": 0.006253512110561132, + "loss_iou": 0.5625, + "loss_num": 0.04443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 150262864, + "step": 2269 + }, + { + "epoch": 0.2124771844433004, + "grad_norm": 37.37397766113281, + "learning_rate": 5e-05, + "loss": 1.4802, + "num_input_tokens_seen": 150329176, + "step": 2270 + }, + { + "epoch": 0.2124771844433004, + "loss": 1.531097412109375, + "loss_ce": 0.005706858821213245, + "loss_iou": 0.60546875, + "loss_num": 0.0625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 150329176, + "step": 2270 + }, + { + "epoch": 0.21257078672719615, + "grad_norm": 32.53642272949219, + "learning_rate": 5e-05, + "loss": 1.785, + "num_input_tokens_seen": 150397212, + "step": 2271 + }, + { + "epoch": 0.21257078672719615, + "loss": 1.8849523067474365, + "loss_ce": 0.007999066263437271, + "loss_iou": 0.7890625, + "loss_num": 0.0595703125, + "loss_xval": 1.875, + "num_input_tokens_seen": 150397212, + "step": 2271 + }, + { + "epoch": 0.21266438901109186, + "grad_norm": 19.93190574645996, + "learning_rate": 5e-05, + "loss": 1.4283, + "num_input_tokens_seen": 150463268, + "step": 2272 + }, + { + "epoch": 0.21266438901109186, + "loss": 1.392273187637329, + "loss_ce": 0.005554419942200184, + "loss_iou": 0.59375, + "loss_num": 0.04052734375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 150463268, + "step": 2272 + }, + { + "epoch": 0.2127579912949876, + "grad_norm": 28.814884185791016, + "learning_rate": 5e-05, + "loss": 1.3199, + "num_input_tokens_seen": 150529540, + "step": 2273 + }, + { + "epoch": 0.2127579912949876, + "loss": 1.3307688236236572, + "loss_ce": 0.00410857331007719, + "loss_iou": 0.56640625, + "loss_num": 0.038818359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 150529540, + "step": 2273 + }, + { + "epoch": 0.21285159357888334, + "grad_norm": 40.80498123168945, + "learning_rate": 5e-05, + "loss": 1.5853, + "num_input_tokens_seen": 150595128, + "step": 2274 + }, + { + "epoch": 0.21285159357888334, + "loss": 1.6934144496917725, + "loss_ce": 0.002984791761264205, + "loss_iou": 0.6953125, + "loss_num": 0.060546875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 150595128, + "step": 2274 + }, + { + "epoch": 0.21294519586277905, + "grad_norm": 14.86881160736084, + "learning_rate": 5e-05, + "loss": 1.3172, + "num_input_tokens_seen": 150661340, + "step": 2275 + }, + { + "epoch": 0.21294519586277905, + "loss": 1.3164842128753662, + "loss_ce": 0.010636991821229458, + "loss_iou": 0.51953125, + "loss_num": 0.052734375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 150661340, + "step": 2275 + }, + { + "epoch": 0.21303879814667478, + "grad_norm": 24.285980224609375, + "learning_rate": 5e-05, + "loss": 1.319, + "num_input_tokens_seen": 150727916, + "step": 2276 + }, + { + "epoch": 0.21303879814667478, + "loss": 1.420497179031372, + "loss_ce": 0.0074112494476139545, + "loss_iou": 0.58984375, + "loss_num": 0.046875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 150727916, + "step": 2276 + }, + { + "epoch": 0.2131324004305705, + "grad_norm": 24.9462890625, + "learning_rate": 5e-05, + "loss": 1.5156, + "num_input_tokens_seen": 150793392, + "step": 2277 + }, + { + "epoch": 0.2131324004305705, + "loss": 1.4599671363830566, + "loss_ce": 0.004888995084911585, + "loss_iou": 0.58984375, + "loss_num": 0.05419921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 150793392, + "step": 2277 + }, + { + "epoch": 0.21322600271446623, + "grad_norm": 27.210254669189453, + "learning_rate": 5e-05, + "loss": 1.5673, + "num_input_tokens_seen": 150859512, + "step": 2278 + }, + { + "epoch": 0.21322600271446623, + "loss": 1.566602349281311, + "loss_ce": 0.010450020432472229, + "loss_iou": 0.62109375, + "loss_num": 0.0625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 150859512, + "step": 2278 + }, + { + "epoch": 0.21331960499836197, + "grad_norm": 34.29689025878906, + "learning_rate": 5e-05, + "loss": 1.4849, + "num_input_tokens_seen": 150926076, + "step": 2279 + }, + { + "epoch": 0.21331960499836197, + "loss": 1.4312529563903809, + "loss_ce": 0.006448186933994293, + "loss_iou": 0.5859375, + "loss_num": 0.05029296875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 150926076, + "step": 2279 + }, + { + "epoch": 0.21341320728225768, + "grad_norm": 25.24986457824707, + "learning_rate": 5e-05, + "loss": 1.7384, + "num_input_tokens_seen": 150992612, + "step": 2280 + }, + { + "epoch": 0.21341320728225768, + "loss": 1.6506407260894775, + "loss_ce": 0.005132874473929405, + "loss_iou": 0.71875, + "loss_num": 0.041015625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 150992612, + "step": 2280 + }, + { + "epoch": 0.21350680956615342, + "grad_norm": 19.837234497070312, + "learning_rate": 5e-05, + "loss": 1.3911, + "num_input_tokens_seen": 151059056, + "step": 2281 + }, + { + "epoch": 0.21350680956615342, + "loss": 1.5271108150482178, + "loss_ce": 0.0031849215738475323, + "loss_iou": 0.63671875, + "loss_num": 0.05078125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 151059056, + "step": 2281 + }, + { + "epoch": 0.21360041185004913, + "grad_norm": 98.2978515625, + "learning_rate": 5e-05, + "loss": 1.4678, + "num_input_tokens_seen": 151125564, + "step": 2282 + }, + { + "epoch": 0.21360041185004913, + "loss": 1.698117971420288, + "loss_ce": 0.009641429409384727, + "loss_iou": 0.68359375, + "loss_num": 0.0634765625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 151125564, + "step": 2282 + }, + { + "epoch": 0.21369401413394487, + "grad_norm": 22.10891342163086, + "learning_rate": 5e-05, + "loss": 1.5324, + "num_input_tokens_seen": 151191800, + "step": 2283 + }, + { + "epoch": 0.21369401413394487, + "loss": 1.639382243156433, + "loss_ce": 0.00461659487336874, + "loss_iou": 0.67578125, + "loss_num": 0.056396484375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 151191800, + "step": 2283 + }, + { + "epoch": 0.2137876164178406, + "grad_norm": 17.821348190307617, + "learning_rate": 5e-05, + "loss": 1.4788, + "num_input_tokens_seen": 151257892, + "step": 2284 + }, + { + "epoch": 0.2137876164178406, + "loss": 1.5812492370605469, + "loss_ce": 0.003612576285377145, + "loss_iou": 0.6484375, + "loss_num": 0.056640625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 151257892, + "step": 2284 + }, + { + "epoch": 0.21388121870173632, + "grad_norm": 28.492366790771484, + "learning_rate": 5e-05, + "loss": 1.5016, + "num_input_tokens_seen": 151324140, + "step": 2285 + }, + { + "epoch": 0.21388121870173632, + "loss": 1.4028370380401611, + "loss_ce": 0.009282313287258148, + "loss_iou": 0.58203125, + "loss_num": 0.046630859375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 151324140, + "step": 2285 + }, + { + "epoch": 0.21397482098563206, + "grad_norm": 36.702606201171875, + "learning_rate": 5e-05, + "loss": 1.3095, + "num_input_tokens_seen": 151388180, + "step": 2286 + }, + { + "epoch": 0.21397482098563206, + "loss": 1.497154712677002, + "loss_ce": 0.004967216867953539, + "loss_iou": 0.62890625, + "loss_num": 0.047119140625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 151388180, + "step": 2286 + }, + { + "epoch": 0.21406842326952777, + "grad_norm": 26.864652633666992, + "learning_rate": 5e-05, + "loss": 1.4334, + "num_input_tokens_seen": 151455172, + "step": 2287 + }, + { + "epoch": 0.21406842326952777, + "loss": 1.5351767539978027, + "loss_ce": 0.0014852817403152585, + "loss_iou": 0.67578125, + "loss_num": 0.036376953125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 151455172, + "step": 2287 + }, + { + "epoch": 0.2141620255534235, + "grad_norm": 20.276927947998047, + "learning_rate": 5e-05, + "loss": 1.6213, + "num_input_tokens_seen": 151521496, + "step": 2288 + }, + { + "epoch": 0.2141620255534235, + "loss": 1.7019915580749512, + "loss_ce": 0.005702382419258356, + "loss_iou": 0.6953125, + "loss_num": 0.06103515625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 151521496, + "step": 2288 + }, + { + "epoch": 0.21425562783731922, + "grad_norm": 26.8983211517334, + "learning_rate": 5e-05, + "loss": 1.6012, + "num_input_tokens_seen": 151587740, + "step": 2289 + }, + { + "epoch": 0.21425562783731922, + "loss": 1.7984768152236938, + "loss_ce": 0.008437741547822952, + "loss_iou": 0.70703125, + "loss_num": 0.07470703125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 151587740, + "step": 2289 + }, + { + "epoch": 0.21434923012121496, + "grad_norm": 22.092914581298828, + "learning_rate": 5e-05, + "loss": 1.4659, + "num_input_tokens_seen": 151653832, + "step": 2290 + }, + { + "epoch": 0.21434923012121496, + "loss": 1.527360439300537, + "loss_ce": 0.01100299321115017, + "loss_iou": 0.58203125, + "loss_num": 0.0703125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 151653832, + "step": 2290 + }, + { + "epoch": 0.2144428324051107, + "grad_norm": 23.715099334716797, + "learning_rate": 5e-05, + "loss": 1.4252, + "num_input_tokens_seen": 151720092, + "step": 2291 + }, + { + "epoch": 0.2144428324051107, + "loss": 1.4918944835662842, + "loss_ce": 0.004589731805026531, + "loss_iou": 0.6328125, + "loss_num": 0.04541015625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 151720092, + "step": 2291 + }, + { + "epoch": 0.2145364346890064, + "grad_norm": 28.33808135986328, + "learning_rate": 5e-05, + "loss": 1.246, + "num_input_tokens_seen": 151785860, + "step": 2292 + }, + { + "epoch": 0.2145364346890064, + "loss": 1.1220732927322388, + "loss_ce": 0.006350632291287184, + "loss_iou": 0.46484375, + "loss_num": 0.036865234375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 151785860, + "step": 2292 + }, + { + "epoch": 0.21463003697290214, + "grad_norm": 31.031892776489258, + "learning_rate": 5e-05, + "loss": 1.5237, + "num_input_tokens_seen": 151852356, + "step": 2293 + }, + { + "epoch": 0.21463003697290214, + "loss": 1.4461427927017212, + "loss_ce": 0.005713123362511396, + "loss_iou": 0.62109375, + "loss_num": 0.04052734375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 151852356, + "step": 2293 + }, + { + "epoch": 0.21472363925679785, + "grad_norm": 18.391902923583984, + "learning_rate": 5e-05, + "loss": 1.1777, + "num_input_tokens_seen": 151919120, + "step": 2294 + }, + { + "epoch": 0.21472363925679785, + "loss": 1.0668582916259766, + "loss_ce": 0.005456896498799324, + "loss_iou": 0.44140625, + "loss_num": 0.035888671875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 151919120, + "step": 2294 + }, + { + "epoch": 0.2148172415406936, + "grad_norm": 28.7263126373291, + "learning_rate": 5e-05, + "loss": 1.3777, + "num_input_tokens_seen": 151984628, + "step": 2295 + }, + { + "epoch": 0.2148172415406936, + "loss": 1.324313998222351, + "loss_ce": 0.004489860497415066, + "loss_iou": 0.498046875, + "loss_num": 0.064453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 151984628, + "step": 2295 + }, + { + "epoch": 0.21491084382458933, + "grad_norm": 22.25419044494629, + "learning_rate": 5e-05, + "loss": 1.5439, + "num_input_tokens_seen": 152051292, + "step": 2296 + }, + { + "epoch": 0.21491084382458933, + "loss": 1.7377827167510986, + "loss_ce": 0.0014546426245942712, + "loss_iou": 0.6875, + "loss_num": 0.072265625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 152051292, + "step": 2296 + }, + { + "epoch": 0.21500444610848504, + "grad_norm": 37.100059509277344, + "learning_rate": 5e-05, + "loss": 1.3964, + "num_input_tokens_seen": 152119152, + "step": 2297 + }, + { + "epoch": 0.21500444610848504, + "loss": 1.3398528099060059, + "loss_ce": 0.005868327338248491, + "loss_iou": 0.58203125, + "loss_num": 0.033447265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 152119152, + "step": 2297 + }, + { + "epoch": 0.21509804839238078, + "grad_norm": 20.610252380371094, + "learning_rate": 5e-05, + "loss": 1.5867, + "num_input_tokens_seen": 152185324, + "step": 2298 + }, + { + "epoch": 0.21509804839238078, + "loss": 1.6689636707305908, + "loss_ce": 0.005389504134654999, + "loss_iou": 0.66796875, + "loss_num": 0.06494140625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 152185324, + "step": 2298 + }, + { + "epoch": 0.2151916506762765, + "grad_norm": 33.635032653808594, + "learning_rate": 5e-05, + "loss": 1.312, + "num_input_tokens_seen": 152250808, + "step": 2299 + }, + { + "epoch": 0.2151916506762765, + "loss": 1.3759686946868896, + "loss_ce": 0.004386695101857185, + "loss_iou": 0.5625, + "loss_num": 0.049072265625, + "loss_xval": 1.375, + "num_input_tokens_seen": 152250808, + "step": 2299 + }, + { + "epoch": 0.21528525296017223, + "grad_norm": 22.86467170715332, + "learning_rate": 5e-05, + "loss": 1.2004, + "num_input_tokens_seen": 152316712, + "step": 2300 + }, + { + "epoch": 0.21528525296017223, + "loss": 1.2595347166061401, + "loss_ce": 0.007581586949527264, + "loss_iou": 0.494140625, + "loss_num": 0.052978515625, + "loss_xval": 1.25, + "num_input_tokens_seen": 152316712, + "step": 2300 + }, + { + "epoch": 0.21537885524406797, + "grad_norm": 25.605911254882812, + "learning_rate": 5e-05, + "loss": 1.4399, + "num_input_tokens_seen": 152383300, + "step": 2301 + }, + { + "epoch": 0.21537885524406797, + "loss": 1.4577968120574951, + "loss_ce": 0.00857799407094717, + "loss_iou": 0.57421875, + "loss_num": 0.060546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 152383300, + "step": 2301 + }, + { + "epoch": 0.21547245752796368, + "grad_norm": 48.16738510131836, + "learning_rate": 5e-05, + "loss": 1.4634, + "num_input_tokens_seen": 152451216, + "step": 2302 + }, + { + "epoch": 0.21547245752796368, + "loss": 1.5580638647079468, + "loss_ce": 0.010212285444140434, + "loss_iou": 0.66015625, + "loss_num": 0.045166015625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 152451216, + "step": 2302 + }, + { + "epoch": 0.21556605981185942, + "grad_norm": 22.7972354888916, + "learning_rate": 5e-05, + "loss": 1.6227, + "num_input_tokens_seen": 152517532, + "step": 2303 + }, + { + "epoch": 0.21556605981185942, + "loss": 1.4903476238250732, + "loss_ce": 0.008902355097234249, + "loss_iou": 0.65625, + "loss_num": 0.0341796875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 152517532, + "step": 2303 + }, + { + "epoch": 0.21565966209575513, + "grad_norm": 15.255064964294434, + "learning_rate": 5e-05, + "loss": 1.4808, + "num_input_tokens_seen": 152583972, + "step": 2304 + }, + { + "epoch": 0.21565966209575513, + "loss": 1.5429351329803467, + "loss_ce": 0.003872634842991829, + "loss_iou": 0.60546875, + "loss_num": 0.0654296875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 152583972, + "step": 2304 + }, + { + "epoch": 0.21575326437965087, + "grad_norm": 50.384613037109375, + "learning_rate": 5e-05, + "loss": 1.2977, + "num_input_tokens_seen": 152649672, + "step": 2305 + }, + { + "epoch": 0.21575326437965087, + "loss": 1.387707233428955, + "loss_ce": 0.006847859360277653, + "loss_iou": 0.58984375, + "loss_num": 0.040771484375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 152649672, + "step": 2305 + }, + { + "epoch": 0.21584686666354658, + "grad_norm": 33.084590911865234, + "learning_rate": 5e-05, + "loss": 1.45, + "num_input_tokens_seen": 152716932, + "step": 2306 + }, + { + "epoch": 0.21584686666354658, + "loss": 1.5436749458312988, + "loss_ce": 0.0036359750665724277, + "loss_iou": 0.6484375, + "loss_num": 0.048583984375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 152716932, + "step": 2306 + }, + { + "epoch": 0.21594046894744232, + "grad_norm": 20.642091751098633, + "learning_rate": 5e-05, + "loss": 1.6851, + "num_input_tokens_seen": 152782976, + "step": 2307 + }, + { + "epoch": 0.21594046894744232, + "loss": 1.8109982013702393, + "loss_ce": 0.00435758288949728, + "loss_iou": 0.7421875, + "loss_num": 0.0634765625, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 152782976, + "step": 2307 + }, + { + "epoch": 0.21603407123133805, + "grad_norm": 13.964632987976074, + "learning_rate": 5e-05, + "loss": 1.3355, + "num_input_tokens_seen": 152849072, + "step": 2308 + }, + { + "epoch": 0.21603407123133805, + "loss": 1.3385752439498901, + "loss_ce": 0.01093850377947092, + "loss_iou": 0.53515625, + "loss_num": 0.051025390625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 152849072, + "step": 2308 + }, + { + "epoch": 0.21612767351523376, + "grad_norm": 24.622068405151367, + "learning_rate": 5e-05, + "loss": 1.3879, + "num_input_tokens_seen": 152915040, + "step": 2309 + }, + { + "epoch": 0.21612767351523376, + "loss": 1.3095051050186157, + "loss_ce": 0.007747296243906021, + "loss_iou": 0.53515625, + "loss_num": 0.04638671875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 152915040, + "step": 2309 + }, + { + "epoch": 0.2162212757991295, + "grad_norm": 23.115848541259766, + "learning_rate": 5e-05, + "loss": 1.5362, + "num_input_tokens_seen": 152981616, + "step": 2310 + }, + { + "epoch": 0.2162212757991295, + "loss": 1.5281729698181152, + "loss_ce": 0.00912993773818016, + "loss_iou": 0.6328125, + "loss_num": 0.05126953125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 152981616, + "step": 2310 + }, + { + "epoch": 0.2163148780830252, + "grad_norm": 104.16942596435547, + "learning_rate": 5e-05, + "loss": 1.4349, + "num_input_tokens_seen": 153048352, + "step": 2311 + }, + { + "epoch": 0.2163148780830252, + "loss": 1.5230183601379395, + "loss_ce": 0.006905078887939453, + "loss_iou": 0.59375, + "loss_num": 0.06591796875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 153048352, + "step": 2311 + }, + { + "epoch": 0.21640848036692095, + "grad_norm": 25.01462173461914, + "learning_rate": 5e-05, + "loss": 1.4899, + "num_input_tokens_seen": 153115308, + "step": 2312 + }, + { + "epoch": 0.21640848036692095, + "loss": 1.5599207878112793, + "loss_ce": 0.006209854036569595, + "loss_iou": 0.6640625, + "loss_num": 0.045654296875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 153115308, + "step": 2312 + }, + { + "epoch": 0.2165020826508167, + "grad_norm": 22.376323699951172, + "learning_rate": 5e-05, + "loss": 1.4721, + "num_input_tokens_seen": 153182640, + "step": 2313 + }, + { + "epoch": 0.2165020826508167, + "loss": 1.455622911453247, + "loss_ce": 0.00933381263166666, + "loss_iou": 0.6171875, + "loss_num": 0.04248046875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 153182640, + "step": 2313 + }, + { + "epoch": 0.2165956849347124, + "grad_norm": 30.825626373291016, + "learning_rate": 5e-05, + "loss": 1.4976, + "num_input_tokens_seen": 153249076, + "step": 2314 + }, + { + "epoch": 0.2165956849347124, + "loss": 1.3127450942993164, + "loss_ce": 0.002686548512428999, + "loss_iou": 0.55078125, + "loss_num": 0.042236328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 153249076, + "step": 2314 + }, + { + "epoch": 0.21668928721860814, + "grad_norm": 33.20108413696289, + "learning_rate": 5e-05, + "loss": 1.7264, + "num_input_tokens_seen": 153315072, + "step": 2315 + }, + { + "epoch": 0.21668928721860814, + "loss": 1.7240190505981445, + "loss_ce": 0.007222216576337814, + "loss_iou": 0.734375, + "loss_num": 0.04931640625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 153315072, + "step": 2315 + }, + { + "epoch": 0.21678288950250385, + "grad_norm": 10.900003433227539, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 153382136, + "step": 2316 + }, + { + "epoch": 0.21678288950250385, + "loss": 1.3354460000991821, + "loss_ce": 0.003903105389326811, + "loss_iou": 0.5390625, + "loss_num": 0.050537109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 153382136, + "step": 2316 + }, + { + "epoch": 0.2168764917863996, + "grad_norm": 36.16751480102539, + "learning_rate": 5e-05, + "loss": 1.2627, + "num_input_tokens_seen": 153448448, + "step": 2317 + }, + { + "epoch": 0.2168764917863996, + "loss": 1.2977166175842285, + "loss_ce": 0.0032829931005835533, + "loss_iou": 0.56640625, + "loss_num": 0.032470703125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 153448448, + "step": 2317 + }, + { + "epoch": 0.21697009407029533, + "grad_norm": 21.0792179107666, + "learning_rate": 5e-05, + "loss": 1.4286, + "num_input_tokens_seen": 153513880, + "step": 2318 + }, + { + "epoch": 0.21697009407029533, + "loss": 1.4616341590881348, + "loss_ce": 0.005579415243119001, + "loss_iou": 0.61328125, + "loss_num": 0.046142578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 153513880, + "step": 2318 + }, + { + "epoch": 0.21706369635419104, + "grad_norm": 25.42380714416504, + "learning_rate": 5e-05, + "loss": 1.3606, + "num_input_tokens_seen": 153579424, + "step": 2319 + }, + { + "epoch": 0.21706369635419104, + "loss": 1.374646544456482, + "loss_ce": 0.004285210743546486, + "loss_iou": 0.5859375, + "loss_num": 0.03955078125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 153579424, + "step": 2319 + }, + { + "epoch": 0.21715729863808678, + "grad_norm": 29.15047836303711, + "learning_rate": 5e-05, + "loss": 1.3324, + "num_input_tokens_seen": 153646624, + "step": 2320 + }, + { + "epoch": 0.21715729863808678, + "loss": 1.2218403816223145, + "loss_ce": 0.011391129344701767, + "loss_iou": 0.51171875, + "loss_num": 0.037841796875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 153646624, + "step": 2320 + }, + { + "epoch": 0.2172509009219825, + "grad_norm": 27.564165115356445, + "learning_rate": 5e-05, + "loss": 1.5839, + "num_input_tokens_seen": 153713048, + "step": 2321 + }, + { + "epoch": 0.2172509009219825, + "loss": 1.962526798248291, + "loss_ce": 0.005495624616742134, + "loss_iou": 0.7890625, + "loss_num": 0.07568359375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 153713048, + "step": 2321 + }, + { + "epoch": 0.21734450320587823, + "grad_norm": 22.206457138061523, + "learning_rate": 5e-05, + "loss": 1.5211, + "num_input_tokens_seen": 153778652, + "step": 2322 + }, + { + "epoch": 0.21734450320587823, + "loss": 1.782984972000122, + "loss_ce": 0.008571009151637554, + "loss_iou": 0.765625, + "loss_num": 0.04833984375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 153778652, + "step": 2322 + }, + { + "epoch": 0.21743810548977396, + "grad_norm": 27.375165939331055, + "learning_rate": 5e-05, + "loss": 1.1316, + "num_input_tokens_seen": 153844580, + "step": 2323 + }, + { + "epoch": 0.21743810548977396, + "loss": 1.2421653270721436, + "loss_ce": 0.00486073549836874, + "loss_iou": 0.5625, + "loss_num": 0.0228271484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 153844580, + "step": 2323 + }, + { + "epoch": 0.21753170777366967, + "grad_norm": 21.734966278076172, + "learning_rate": 5e-05, + "loss": 1.7028, + "num_input_tokens_seen": 153910768, + "step": 2324 + }, + { + "epoch": 0.21753170777366967, + "loss": 1.7416293621063232, + "loss_ce": 0.007254478521645069, + "loss_iou": 0.734375, + "loss_num": 0.05224609375, + "loss_xval": 1.734375, + "num_input_tokens_seen": 153910768, + "step": 2324 + }, + { + "epoch": 0.2176253100575654, + "grad_norm": 17.44476890563965, + "learning_rate": 5e-05, + "loss": 1.1036, + "num_input_tokens_seen": 153977908, + "step": 2325 + }, + { + "epoch": 0.2176253100575654, + "loss": 1.0553233623504639, + "loss_ce": 0.00405379943549633, + "loss_iou": 0.466796875, + "loss_num": 0.0235595703125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 153977908, + "step": 2325 + }, + { + "epoch": 0.21771891234146112, + "grad_norm": 20.79197883605957, + "learning_rate": 5e-05, + "loss": 1.4434, + "num_input_tokens_seen": 154044332, + "step": 2326 + }, + { + "epoch": 0.21771891234146112, + "loss": 1.4662706851959229, + "loss_ce": 0.010215958580374718, + "loss_iou": 0.58203125, + "loss_num": 0.05859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 154044332, + "step": 2326 + }, + { + "epoch": 0.21781251462535686, + "grad_norm": 14.832798957824707, + "learning_rate": 5e-05, + "loss": 1.3066, + "num_input_tokens_seen": 154111084, + "step": 2327 + }, + { + "epoch": 0.21781251462535686, + "loss": 1.3189163208007812, + "loss_ce": 0.003974883817136288, + "loss_iou": 0.51953125, + "loss_num": 0.05517578125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 154111084, + "step": 2327 + }, + { + "epoch": 0.21790611690925257, + "grad_norm": 18.780778884887695, + "learning_rate": 5e-05, + "loss": 1.3165, + "num_input_tokens_seen": 154177292, + "step": 2328 + }, + { + "epoch": 0.21790611690925257, + "loss": 1.3648505210876465, + "loss_ce": 0.004499020054936409, + "loss_iou": 0.58984375, + "loss_num": 0.03662109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 154177292, + "step": 2328 + }, + { + "epoch": 0.2179997191931483, + "grad_norm": 19.013347625732422, + "learning_rate": 5e-05, + "loss": 1.3023, + "num_input_tokens_seen": 154243572, + "step": 2329 + }, + { + "epoch": 0.2179997191931483, + "loss": 1.3702499866485596, + "loss_ce": 0.005015599075704813, + "loss_iou": 0.48828125, + "loss_num": 0.0771484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 154243572, + "step": 2329 + }, + { + "epoch": 0.21809332147704405, + "grad_norm": 12.443313598632812, + "learning_rate": 5e-05, + "loss": 1.2873, + "num_input_tokens_seen": 154309600, + "step": 2330 + }, + { + "epoch": 0.21809332147704405, + "loss": 1.2727537155151367, + "loss_ce": 0.004931387957185507, + "loss_iou": 0.54296875, + "loss_num": 0.035888671875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 154309600, + "step": 2330 + }, + { + "epoch": 0.21818692376093976, + "grad_norm": 15.309600830078125, + "learning_rate": 5e-05, + "loss": 1.2904, + "num_input_tokens_seen": 154376540, + "step": 2331 + }, + { + "epoch": 0.21818692376093976, + "loss": 1.1204233169555664, + "loss_ce": 0.0027475638780742884, + "loss_iou": 0.486328125, + "loss_num": 0.02880859375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 154376540, + "step": 2331 + }, + { + "epoch": 0.2182805260448355, + "grad_norm": 26.37431526184082, + "learning_rate": 5e-05, + "loss": 1.27, + "num_input_tokens_seen": 154442212, + "step": 2332 + }, + { + "epoch": 0.2182805260448355, + "loss": 1.0199689865112305, + "loss_ce": 0.006541187409311533, + "loss_iou": 0.37890625, + "loss_num": 0.051025390625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 154442212, + "step": 2332 + }, + { + "epoch": 0.2183741283287312, + "grad_norm": 17.268339157104492, + "learning_rate": 5e-05, + "loss": 1.3789, + "num_input_tokens_seen": 154508468, + "step": 2333 + }, + { + "epoch": 0.2183741283287312, + "loss": 1.6162927150726318, + "loss_ce": 0.004964606836438179, + "loss_iou": 0.65625, + "loss_num": 0.060302734375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 154508468, + "step": 2333 + }, + { + "epoch": 0.21846773061262695, + "grad_norm": 24.678123474121094, + "learning_rate": 5e-05, + "loss": 1.4215, + "num_input_tokens_seen": 154576148, + "step": 2334 + }, + { + "epoch": 0.21846773061262695, + "loss": 1.5552942752838135, + "loss_ce": 0.00548955611884594, + "loss_iou": 0.6484375, + "loss_num": 0.05029296875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 154576148, + "step": 2334 + }, + { + "epoch": 0.2185613328965227, + "grad_norm": 20.073673248291016, + "learning_rate": 5e-05, + "loss": 1.5777, + "num_input_tokens_seen": 154642524, + "step": 2335 + }, + { + "epoch": 0.2185613328965227, + "loss": 1.4285829067230225, + "loss_ce": 0.007196234539151192, + "loss_iou": 0.57421875, + "loss_num": 0.05517578125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 154642524, + "step": 2335 + }, + { + "epoch": 0.2186549351804184, + "grad_norm": 29.751808166503906, + "learning_rate": 5e-05, + "loss": 1.2831, + "num_input_tokens_seen": 154709156, + "step": 2336 + }, + { + "epoch": 0.2186549351804184, + "loss": 1.2040770053863525, + "loss_ce": 0.006323082372546196, + "loss_iou": 0.498046875, + "loss_num": 0.0400390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 154709156, + "step": 2336 + }, + { + "epoch": 0.21874853746431414, + "grad_norm": 24.08979606628418, + "learning_rate": 5e-05, + "loss": 1.6359, + "num_input_tokens_seen": 154775196, + "step": 2337 + }, + { + "epoch": 0.21874853746431414, + "loss": 1.6917073726654053, + "loss_ce": 0.006160551682114601, + "loss_iou": 0.7109375, + "loss_num": 0.052978515625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 154775196, + "step": 2337 + }, + { + "epoch": 0.21884213974820985, + "grad_norm": 26.720741271972656, + "learning_rate": 5e-05, + "loss": 1.2053, + "num_input_tokens_seen": 154841964, + "step": 2338 + }, + { + "epoch": 0.21884213974820985, + "loss": 1.1504793167114258, + "loss_ce": 0.0064363982528448105, + "loss_iou": 0.466796875, + "loss_num": 0.042236328125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 154841964, + "step": 2338 + }, + { + "epoch": 0.21893574203210558, + "grad_norm": 18.2683162689209, + "learning_rate": 5e-05, + "loss": 1.2713, + "num_input_tokens_seen": 154907748, + "step": 2339 + }, + { + "epoch": 0.21893574203210558, + "loss": 1.1781668663024902, + "loss_ce": 0.00238564470782876, + "loss_iou": 0.484375, + "loss_num": 0.04150390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 154907748, + "step": 2339 + }, + { + "epoch": 0.21902934431600132, + "grad_norm": 26.95297622680664, + "learning_rate": 5e-05, + "loss": 1.4753, + "num_input_tokens_seen": 154974288, + "step": 2340 + }, + { + "epoch": 0.21902934431600132, + "loss": 1.4889464378356934, + "loss_ce": 0.007012868300080299, + "loss_iou": 0.62109375, + "loss_num": 0.04833984375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 154974288, + "step": 2340 + }, + { + "epoch": 0.21912294659989703, + "grad_norm": 33.99496078491211, + "learning_rate": 5e-05, + "loss": 1.2593, + "num_input_tokens_seen": 155039436, + "step": 2341 + }, + { + "epoch": 0.21912294659989703, + "loss": 1.2838804721832275, + "loss_ce": 0.0055601941421628, + "loss_iou": 0.54296875, + "loss_num": 0.03857421875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 155039436, + "step": 2341 + }, + { + "epoch": 0.21921654888379277, + "grad_norm": 22.024890899658203, + "learning_rate": 5e-05, + "loss": 1.5783, + "num_input_tokens_seen": 155105084, + "step": 2342 + }, + { + "epoch": 0.21921654888379277, + "loss": 1.4463050365447998, + "loss_ce": 0.0070350803434848785, + "loss_iou": 0.60546875, + "loss_num": 0.04541015625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 155105084, + "step": 2342 + }, + { + "epoch": 0.21931015116768848, + "grad_norm": 43.48978042602539, + "learning_rate": 5e-05, + "loss": 1.3735, + "num_input_tokens_seen": 155171816, + "step": 2343 + }, + { + "epoch": 0.21931015116768848, + "loss": 1.605210781097412, + "loss_ce": 0.0046248482540249825, + "loss_iou": 0.6640625, + "loss_num": 0.05517578125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 155171816, + "step": 2343 + }, + { + "epoch": 0.21940375345158422, + "grad_norm": 12.529638290405273, + "learning_rate": 5e-05, + "loss": 1.3214, + "num_input_tokens_seen": 155238608, + "step": 2344 + }, + { + "epoch": 0.21940375345158422, + "loss": 1.2851300239562988, + "loss_ce": 0.007786213420331478, + "loss_iou": 0.54296875, + "loss_num": 0.038818359375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 155238608, + "step": 2344 + }, + { + "epoch": 0.21949735573547993, + "grad_norm": 19.131290435791016, + "learning_rate": 5e-05, + "loss": 1.4721, + "num_input_tokens_seen": 155304780, + "step": 2345 + }, + { + "epoch": 0.21949735573547993, + "loss": 1.5659270286560059, + "loss_ce": 0.005868379957973957, + "loss_iou": 0.6171875, + "loss_num": 0.064453125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 155304780, + "step": 2345 + }, + { + "epoch": 0.21959095801937567, + "grad_norm": 31.61728858947754, + "learning_rate": 5e-05, + "loss": 1.2846, + "num_input_tokens_seen": 155370144, + "step": 2346 + }, + { + "epoch": 0.21959095801937567, + "loss": 1.262368083000183, + "loss_ce": 0.004555597901344299, + "loss_iou": 0.5, + "loss_num": 0.052001953125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 155370144, + "step": 2346 + }, + { + "epoch": 0.2196845603032714, + "grad_norm": 21.765249252319336, + "learning_rate": 5e-05, + "loss": 1.6413, + "num_input_tokens_seen": 155435816, + "step": 2347 + }, + { + "epoch": 0.2196845603032714, + "loss": 1.5300925970077515, + "loss_ce": 0.008608181029558182, + "loss_iou": 0.66796875, + "loss_num": 0.03759765625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 155435816, + "step": 2347 + }, + { + "epoch": 0.21977816258716712, + "grad_norm": 14.472197532653809, + "learning_rate": 5e-05, + "loss": 1.2535, + "num_input_tokens_seen": 155502040, + "step": 2348 + }, + { + "epoch": 0.21977816258716712, + "loss": 1.121678352355957, + "loss_ce": 0.0027817978989332914, + "loss_iou": 0.470703125, + "loss_num": 0.03515625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 155502040, + "step": 2348 + }, + { + "epoch": 0.21987176487106286, + "grad_norm": 22.763681411743164, + "learning_rate": 5e-05, + "loss": 1.1664, + "num_input_tokens_seen": 155568120, + "step": 2349 + }, + { + "epoch": 0.21987176487106286, + "loss": 1.309107780456543, + "loss_ce": 0.00734997633844614, + "loss_iou": 0.51953125, + "loss_num": 0.05224609375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 155568120, + "step": 2349 + }, + { + "epoch": 0.21996536715495857, + "grad_norm": 19.524024963378906, + "learning_rate": 5e-05, + "loss": 1.3087, + "num_input_tokens_seen": 155635224, + "step": 2350 + }, + { + "epoch": 0.21996536715495857, + "loss": 1.471051812171936, + "loss_ce": 0.009137764573097229, + "loss_iou": 0.59375, + "loss_num": 0.05419921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 155635224, + "step": 2350 + }, + { + "epoch": 0.2200589694388543, + "grad_norm": 34.68376159667969, + "learning_rate": 5e-05, + "loss": 1.3156, + "num_input_tokens_seen": 155700372, + "step": 2351 + }, + { + "epoch": 0.2200589694388543, + "loss": 1.2919411659240723, + "loss_ce": 0.009470431134104729, + "loss_iou": 0.515625, + "loss_num": 0.04931640625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 155700372, + "step": 2351 + }, + { + "epoch": 0.22015257172275005, + "grad_norm": 84.88421630859375, + "learning_rate": 5e-05, + "loss": 1.6112, + "num_input_tokens_seen": 155766948, + "step": 2352 + }, + { + "epoch": 0.22015257172275005, + "loss": 1.6565346717834473, + "loss_ce": 0.004190912935882807, + "loss_iou": 0.7109375, + "loss_num": 0.04638671875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 155766948, + "step": 2352 + }, + { + "epoch": 0.22024617400664576, + "grad_norm": 10.805121421813965, + "learning_rate": 5e-05, + "loss": 1.4896, + "num_input_tokens_seen": 155832876, + "step": 2353 + }, + { + "epoch": 0.22024617400664576, + "loss": 1.6248092651367188, + "loss_ce": 0.00469213305041194, + "loss_iou": 0.69921875, + "loss_num": 0.044677734375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 155832876, + "step": 2353 + }, + { + "epoch": 0.2203397762905415, + "grad_norm": 17.746360778808594, + "learning_rate": 5e-05, + "loss": 1.2904, + "num_input_tokens_seen": 155899048, + "step": 2354 + }, + { + "epoch": 0.2203397762905415, + "loss": 1.238121509552002, + "loss_ce": 0.0032582785934209824, + "loss_iou": 0.52734375, + "loss_num": 0.035888671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 155899048, + "step": 2354 + }, + { + "epoch": 0.2204333785744372, + "grad_norm": 25.77299690246582, + "learning_rate": 5e-05, + "loss": 1.4596, + "num_input_tokens_seen": 155964556, + "step": 2355 + }, + { + "epoch": 0.2204333785744372, + "loss": 1.2023621797561646, + "loss_ce": 0.006317278370261192, + "loss_iou": 0.4921875, + "loss_num": 0.042236328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 155964556, + "step": 2355 + }, + { + "epoch": 0.22052698085833294, + "grad_norm": 43.80807113647461, + "learning_rate": 5e-05, + "loss": 1.5496, + "num_input_tokens_seen": 156031108, + "step": 2356 + }, + { + "epoch": 0.22052698085833294, + "loss": 1.5094894170761108, + "loss_ce": 0.005094944499433041, + "loss_iou": 0.66015625, + "loss_num": 0.03759765625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 156031108, + "step": 2356 + }, + { + "epoch": 0.22062058314222868, + "grad_norm": 17.146434783935547, + "learning_rate": 5e-05, + "loss": 1.6264, + "num_input_tokens_seen": 156097468, + "step": 2357 + }, + { + "epoch": 0.22062058314222868, + "loss": 1.7161318063735962, + "loss_ce": 0.0022646307479590178, + "loss_iou": 0.75390625, + "loss_num": 0.041015625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 156097468, + "step": 2357 + }, + { + "epoch": 0.2207141854261244, + "grad_norm": 29.371784210205078, + "learning_rate": 5e-05, + "loss": 1.5495, + "num_input_tokens_seen": 156163380, + "step": 2358 + }, + { + "epoch": 0.2207141854261244, + "loss": 1.7303476333618164, + "loss_ce": 0.006714966148138046, + "loss_iou": 0.734375, + "loss_num": 0.051025390625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 156163380, + "step": 2358 + }, + { + "epoch": 0.22080778771002013, + "grad_norm": 48.36513900756836, + "learning_rate": 5e-05, + "loss": 1.4079, + "num_input_tokens_seen": 156229304, + "step": 2359 + }, + { + "epoch": 0.22080778771002013, + "loss": 1.62276291847229, + "loss_ce": 0.005575337912887335, + "loss_iou": 0.67578125, + "loss_num": 0.053955078125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 156229304, + "step": 2359 + }, + { + "epoch": 0.22090138999391584, + "grad_norm": 26.75238037109375, + "learning_rate": 5e-05, + "loss": 1.5, + "num_input_tokens_seen": 156296604, + "step": 2360 + }, + { + "epoch": 0.22090138999391584, + "loss": 1.39876389503479, + "loss_ce": 0.007162292487919331, + "loss_iou": 0.6171875, + "loss_num": 0.0322265625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 156296604, + "step": 2360 + }, + { + "epoch": 0.22099499227781158, + "grad_norm": 40.60019302368164, + "learning_rate": 5e-05, + "loss": 1.2413, + "num_input_tokens_seen": 156363332, + "step": 2361 + }, + { + "epoch": 0.22099499227781158, + "loss": 1.3198292255401611, + "loss_ce": 0.003911266103386879, + "loss_iou": 0.55859375, + "loss_num": 0.0400390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 156363332, + "step": 2361 + }, + { + "epoch": 0.22108859456170732, + "grad_norm": 23.686565399169922, + "learning_rate": 5e-05, + "loss": 1.3274, + "num_input_tokens_seen": 156429808, + "step": 2362 + }, + { + "epoch": 0.22108859456170732, + "loss": 1.3950692415237427, + "loss_ce": 0.012256758287549019, + "loss_iou": 0.58203125, + "loss_num": 0.043212890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 156429808, + "step": 2362 + }, + { + "epoch": 0.22118219684560303, + "grad_norm": 47.21058654785156, + "learning_rate": 5e-05, + "loss": 1.559, + "num_input_tokens_seen": 156495828, + "step": 2363 + }, + { + "epoch": 0.22118219684560303, + "loss": 1.6289995908737183, + "loss_ce": 0.005952713079750538, + "loss_iou": 0.6484375, + "loss_num": 0.064453125, + "loss_xval": 1.625, + "num_input_tokens_seen": 156495828, + "step": 2363 + }, + { + "epoch": 0.22127579912949877, + "grad_norm": 23.084014892578125, + "learning_rate": 5e-05, + "loss": 1.5317, + "num_input_tokens_seen": 156562224, + "step": 2364 + }, + { + "epoch": 0.22127579912949877, + "loss": 1.513930082321167, + "loss_ce": 0.004164400044828653, + "loss_iou": 0.6015625, + "loss_num": 0.06103515625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 156562224, + "step": 2364 + }, + { + "epoch": 0.22136940141339448, + "grad_norm": 13.007682800292969, + "learning_rate": 5e-05, + "loss": 1.4322, + "num_input_tokens_seen": 156628364, + "step": 2365 + }, + { + "epoch": 0.22136940141339448, + "loss": 1.3901398181915283, + "loss_ce": 0.008303900249302387, + "loss_iou": 0.5625, + "loss_num": 0.052001953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 156628364, + "step": 2365 + }, + { + "epoch": 0.22146300369729022, + "grad_norm": 15.463536262512207, + "learning_rate": 5e-05, + "loss": 1.3102, + "num_input_tokens_seen": 156694452, + "step": 2366 + }, + { + "epoch": 0.22146300369729022, + "loss": 1.3520538806915283, + "loss_ce": 0.0034210835583508015, + "loss_iou": 0.54296875, + "loss_num": 0.052490234375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 156694452, + "step": 2366 + }, + { + "epoch": 0.22155660598118593, + "grad_norm": 32.025978088378906, + "learning_rate": 5e-05, + "loss": 1.4148, + "num_input_tokens_seen": 156761440, + "step": 2367 + }, + { + "epoch": 0.22155660598118593, + "loss": 1.164376974105835, + "loss_ce": 0.002939025405794382, + "loss_iou": 0.5234375, + "loss_num": 0.0223388671875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 156761440, + "step": 2367 + }, + { + "epoch": 0.22165020826508167, + "grad_norm": 21.3950138092041, + "learning_rate": 5e-05, + "loss": 1.5863, + "num_input_tokens_seen": 156827060, + "step": 2368 + }, + { + "epoch": 0.22165020826508167, + "loss": 1.6351876258850098, + "loss_ce": 0.0062814257107675076, + "loss_iou": 0.640625, + "loss_num": 0.06982421875, + "loss_xval": 1.625, + "num_input_tokens_seen": 156827060, + "step": 2368 + }, + { + "epoch": 0.2217438105489774, + "grad_norm": 20.068511962890625, + "learning_rate": 5e-05, + "loss": 1.4375, + "num_input_tokens_seen": 156891820, + "step": 2369 + }, + { + "epoch": 0.2217438105489774, + "loss": 1.3463859558105469, + "loss_ce": 0.005077444948256016, + "loss_iou": 0.5390625, + "loss_num": 0.052734375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 156891820, + "step": 2369 + }, + { + "epoch": 0.22183741283287312, + "grad_norm": 19.693435668945312, + "learning_rate": 5e-05, + "loss": 1.5323, + "num_input_tokens_seen": 156958584, + "step": 2370 + }, + { + "epoch": 0.22183741283287312, + "loss": 1.6072475910186768, + "loss_ce": 0.008126460015773773, + "loss_iou": 0.6484375, + "loss_num": 0.061279296875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 156958584, + "step": 2370 + }, + { + "epoch": 0.22193101511676885, + "grad_norm": 30.499183654785156, + "learning_rate": 5e-05, + "loss": 1.3854, + "num_input_tokens_seen": 157023540, + "step": 2371 + }, + { + "epoch": 0.22193101511676885, + "loss": 1.431348204612732, + "loss_ce": 0.007764289155602455, + "loss_iou": 0.5625, + "loss_num": 0.059814453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 157023540, + "step": 2371 + }, + { + "epoch": 0.22202461740066456, + "grad_norm": 24.115846633911133, + "learning_rate": 5e-05, + "loss": 1.6977, + "num_input_tokens_seen": 157090452, + "step": 2372 + }, + { + "epoch": 0.22202461740066456, + "loss": 1.7665398120880127, + "loss_ce": 0.003844510531052947, + "loss_iou": 0.72265625, + "loss_num": 0.06396484375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 157090452, + "step": 2372 + }, + { + "epoch": 0.2221182196845603, + "grad_norm": 68.1358642578125, + "learning_rate": 5e-05, + "loss": 1.3712, + "num_input_tokens_seen": 157156644, + "step": 2373 + }, + { + "epoch": 0.2221182196845603, + "loss": 1.4100452661514282, + "loss_ce": 0.002330407965928316, + "loss_iou": 0.58984375, + "loss_num": 0.04541015625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 157156644, + "step": 2373 + }, + { + "epoch": 0.22221182196845604, + "grad_norm": 19.001420974731445, + "learning_rate": 5e-05, + "loss": 1.7268, + "num_input_tokens_seen": 157223328, + "step": 2374 + }, + { + "epoch": 0.22221182196845604, + "loss": 1.8332513570785522, + "loss_ce": 0.00463805440813303, + "loss_iou": 0.70703125, + "loss_num": 0.0830078125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 157223328, + "step": 2374 + }, + { + "epoch": 0.22230542425235175, + "grad_norm": 84.92061614990234, + "learning_rate": 5e-05, + "loss": 1.2984, + "num_input_tokens_seen": 157290796, + "step": 2375 + }, + { + "epoch": 0.22230542425235175, + "loss": 1.2620265483856201, + "loss_ce": 0.0022609643638134003, + "loss_iou": 0.5390625, + "loss_num": 0.036376953125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 157290796, + "step": 2375 + }, + { + "epoch": 0.2223990265362475, + "grad_norm": 19.052217483520508, + "learning_rate": 5e-05, + "loss": 1.2273, + "num_input_tokens_seen": 157357600, + "step": 2376 + }, + { + "epoch": 0.2223990265362475, + "loss": 1.0218197107315063, + "loss_ce": 0.00521816685795784, + "loss_iou": 0.421875, + "loss_num": 0.03466796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 157357600, + "step": 2376 + }, + { + "epoch": 0.2224926288201432, + "grad_norm": 31.591190338134766, + "learning_rate": 5e-05, + "loss": 1.5013, + "num_input_tokens_seen": 157423792, + "step": 2377 + }, + { + "epoch": 0.2224926288201432, + "loss": 1.574617862701416, + "loss_ce": 0.004793614149093628, + "loss_iou": 0.62109375, + "loss_num": 0.06591796875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 157423792, + "step": 2377 + }, + { + "epoch": 0.22258623110403894, + "grad_norm": 30.82547950744629, + "learning_rate": 5e-05, + "loss": 1.2971, + "num_input_tokens_seen": 157489616, + "step": 2378 + }, + { + "epoch": 0.22258623110403894, + "loss": 1.2583870887756348, + "loss_ce": 0.003260166384279728, + "loss_iou": 0.51953125, + "loss_num": 0.04345703125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 157489616, + "step": 2378 + }, + { + "epoch": 0.22267983338793468, + "grad_norm": 20.78563690185547, + "learning_rate": 5e-05, + "loss": 1.6914, + "num_input_tokens_seen": 157556560, + "step": 2379 + }, + { + "epoch": 0.22267983338793468, + "loss": 1.684895396232605, + "loss_ce": 0.0071610379964113235, + "loss_iou": 0.72265625, + "loss_num": 0.046630859375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 157556560, + "step": 2379 + }, + { + "epoch": 0.2227734356718304, + "grad_norm": 15.464996337890625, + "learning_rate": 5e-05, + "loss": 1.0427, + "num_input_tokens_seen": 157621252, + "step": 2380 + }, + { + "epoch": 0.2227734356718304, + "loss": 0.9868307113647461, + "loss_ce": 0.009291645139455795, + "loss_iou": 0.376953125, + "loss_num": 0.044677734375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 157621252, + "step": 2380 + }, + { + "epoch": 0.22286703795572613, + "grad_norm": 46.25885009765625, + "learning_rate": 5e-05, + "loss": 1.4793, + "num_input_tokens_seen": 157686704, + "step": 2381 + }, + { + "epoch": 0.22286703795572613, + "loss": 1.4747636318206787, + "loss_ce": 0.0040604411624372005, + "loss_iou": 0.6171875, + "loss_num": 0.046875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 157686704, + "step": 2381 + }, + { + "epoch": 0.22296064023962184, + "grad_norm": 24.369220733642578, + "learning_rate": 5e-05, + "loss": 1.5605, + "num_input_tokens_seen": 157753524, + "step": 2382 + }, + { + "epoch": 0.22296064023962184, + "loss": 1.5344555377960205, + "loss_ce": 0.005158551037311554, + "loss_iou": 0.6796875, + "loss_num": 0.03369140625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 157753524, + "step": 2382 + }, + { + "epoch": 0.22305424252351758, + "grad_norm": 24.736207962036133, + "learning_rate": 5e-05, + "loss": 1.4471, + "num_input_tokens_seen": 157819664, + "step": 2383 + }, + { + "epoch": 0.22305424252351758, + "loss": 1.467447280883789, + "loss_ce": 0.0069979652762413025, + "loss_iou": 0.6171875, + "loss_num": 0.044921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 157819664, + "step": 2383 + }, + { + "epoch": 0.22314784480741331, + "grad_norm": 22.33594512939453, + "learning_rate": 5e-05, + "loss": 1.4575, + "num_input_tokens_seen": 157885804, + "step": 2384 + }, + { + "epoch": 0.22314784480741331, + "loss": 1.5102100372314453, + "loss_ce": 0.006303833797574043, + "loss_iou": 0.609375, + "loss_num": 0.057373046875, + "loss_xval": 1.5, + "num_input_tokens_seen": 157885804, + "step": 2384 + }, + { + "epoch": 0.22324144709130903, + "grad_norm": 32.80448913574219, + "learning_rate": 5e-05, + "loss": 1.416, + "num_input_tokens_seen": 157951864, + "step": 2385 + }, + { + "epoch": 0.22324144709130903, + "loss": 1.3747851848602295, + "loss_ce": 0.0041798185557127, + "loss_iou": 0.6015625, + "loss_num": 0.033935546875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 157951864, + "step": 2385 + }, + { + "epoch": 0.22333504937520476, + "grad_norm": 19.613697052001953, + "learning_rate": 5e-05, + "loss": 1.6081, + "num_input_tokens_seen": 158019132, + "step": 2386 + }, + { + "epoch": 0.22333504937520476, + "loss": 1.527367115020752, + "loss_ce": 0.006370993331074715, + "loss_iou": 0.58203125, + "loss_num": 0.0712890625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 158019132, + "step": 2386 + }, + { + "epoch": 0.22342865165910047, + "grad_norm": 17.705842971801758, + "learning_rate": 5e-05, + "loss": 1.5608, + "num_input_tokens_seen": 158085192, + "step": 2387 + }, + { + "epoch": 0.22342865165910047, + "loss": 1.56697678565979, + "loss_ce": 0.004476805683225393, + "loss_iou": 0.6015625, + "loss_num": 0.07275390625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 158085192, + "step": 2387 + }, + { + "epoch": 0.2235222539429962, + "grad_norm": 22.97504234313965, + "learning_rate": 5e-05, + "loss": 1.2713, + "num_input_tokens_seen": 158152236, + "step": 2388 + }, + { + "epoch": 0.2235222539429962, + "loss": 1.4159389734268188, + "loss_ce": 0.0028530380222946405, + "loss_iou": 0.5625, + "loss_num": 0.0576171875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 158152236, + "step": 2388 + }, + { + "epoch": 0.22361585622689192, + "grad_norm": 39.97024154663086, + "learning_rate": 5e-05, + "loss": 1.3626, + "num_input_tokens_seen": 158219804, + "step": 2389 + }, + { + "epoch": 0.22361585622689192, + "loss": 1.429793357849121, + "loss_ce": 0.0064534759148955345, + "loss_iou": 0.59765625, + "loss_num": 0.046630859375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 158219804, + "step": 2389 + }, + { + "epoch": 0.22370945851078766, + "grad_norm": 55.85667419433594, + "learning_rate": 5e-05, + "loss": 1.4422, + "num_input_tokens_seen": 158285144, + "step": 2390 + }, + { + "epoch": 0.22370945851078766, + "loss": 1.5298120975494385, + "loss_ce": 0.00295661692507565, + "loss_iou": 0.70703125, + "loss_num": 0.022216796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 158285144, + "step": 2390 + }, + { + "epoch": 0.2238030607946834, + "grad_norm": 18.258556365966797, + "learning_rate": 5e-05, + "loss": 1.3819, + "num_input_tokens_seen": 158351220, + "step": 2391 + }, + { + "epoch": 0.2238030607946834, + "loss": 1.1339879035949707, + "loss_ce": 0.003128534648567438, + "loss_iou": 0.490234375, + "loss_num": 0.0299072265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 158351220, + "step": 2391 + }, + { + "epoch": 0.2238966630785791, + "grad_norm": 32.55884552001953, + "learning_rate": 5e-05, + "loss": 1.4388, + "num_input_tokens_seen": 158418340, + "step": 2392 + }, + { + "epoch": 0.2238966630785791, + "loss": 1.4010716676712036, + "loss_ce": 0.005563849583268166, + "loss_iou": 0.5859375, + "loss_num": 0.04443359375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 158418340, + "step": 2392 + }, + { + "epoch": 0.22399026536247485, + "grad_norm": 20.28363609313965, + "learning_rate": 5e-05, + "loss": 1.734, + "num_input_tokens_seen": 158484052, + "step": 2393 + }, + { + "epoch": 0.22399026536247485, + "loss": 1.9104260206222534, + "loss_ce": 0.014918213710188866, + "loss_iou": 0.765625, + "loss_num": 0.07275390625, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 158484052, + "step": 2393 + }, + { + "epoch": 0.22408386764637056, + "grad_norm": 38.84884262084961, + "learning_rate": 5e-05, + "loss": 1.6276, + "num_input_tokens_seen": 158549924, + "step": 2394 + }, + { + "epoch": 0.22408386764637056, + "loss": 1.6365025043487549, + "loss_ce": 0.002713385969400406, + "loss_iou": 0.6875, + "loss_num": 0.0517578125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 158549924, + "step": 2394 + }, + { + "epoch": 0.2241774699302663, + "grad_norm": 38.612953186035156, + "learning_rate": 5e-05, + "loss": 1.6393, + "num_input_tokens_seen": 158616112, + "step": 2395 + }, + { + "epoch": 0.2241774699302663, + "loss": 1.5509424209594727, + "loss_ce": 0.0021142764016985893, + "loss_iou": 0.68359375, + "loss_num": 0.03515625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 158616112, + "step": 2395 + }, + { + "epoch": 0.22427107221416204, + "grad_norm": 20.79043197631836, + "learning_rate": 5e-05, + "loss": 1.6254, + "num_input_tokens_seen": 158681624, + "step": 2396 + }, + { + "epoch": 0.22427107221416204, + "loss": 1.4152926206588745, + "loss_ce": 0.010507477447390556, + "loss_iou": 0.6171875, + "loss_num": 0.0341796875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 158681624, + "step": 2396 + }, + { + "epoch": 0.22436467449805775, + "grad_norm": 18.30155372619629, + "learning_rate": 5e-05, + "loss": 1.379, + "num_input_tokens_seen": 158747932, + "step": 2397 + }, + { + "epoch": 0.22436467449805775, + "loss": 1.2756130695343018, + "loss_ce": 0.00461688544601202, + "loss_iou": 0.5546875, + "loss_num": 0.03271484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 158747932, + "step": 2397 + }, + { + "epoch": 0.2244582767819535, + "grad_norm": 26.021142959594727, + "learning_rate": 5e-05, + "loss": 1.6453, + "num_input_tokens_seen": 158814508, + "step": 2398 + }, + { + "epoch": 0.2244582767819535, + "loss": 1.6735025644302368, + "loss_ce": 0.008463489823043346, + "loss_iou": 0.61328125, + "loss_num": 0.0869140625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 158814508, + "step": 2398 + }, + { + "epoch": 0.2245518790658492, + "grad_norm": 23.390729904174805, + "learning_rate": 5e-05, + "loss": 1.5187, + "num_input_tokens_seen": 158881304, + "step": 2399 + }, + { + "epoch": 0.2245518790658492, + "loss": 1.4685648679733276, + "loss_ce": 0.0037210932932794094, + "loss_iou": 0.62890625, + "loss_num": 0.042236328125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 158881304, + "step": 2399 + }, + { + "epoch": 0.22464548134974494, + "grad_norm": 124.52165222167969, + "learning_rate": 5e-05, + "loss": 1.3992, + "num_input_tokens_seen": 158948252, + "step": 2400 + }, + { + "epoch": 0.22464548134974494, + "loss": 1.2291803359985352, + "loss_ce": 0.003106124699115753, + "loss_iou": 0.55078125, + "loss_num": 0.0245361328125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 158948252, + "step": 2400 + }, + { + "epoch": 0.22473908363364067, + "grad_norm": 18.20048713684082, + "learning_rate": 5e-05, + "loss": 1.5878, + "num_input_tokens_seen": 159013608, + "step": 2401 + }, + { + "epoch": 0.22473908363364067, + "loss": 1.5494269132614136, + "loss_ce": 0.011341002769768238, + "loss_iou": 0.6328125, + "loss_num": 0.0537109375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 159013608, + "step": 2401 + }, + { + "epoch": 0.22483268591753638, + "grad_norm": 17.248010635375977, + "learning_rate": 5e-05, + "loss": 1.0412, + "num_input_tokens_seen": 159078748, + "step": 2402 + }, + { + "epoch": 0.22483268591753638, + "loss": 1.0647587776184082, + "loss_ce": 0.005676723085343838, + "loss_iou": 0.4296875, + "loss_num": 0.0400390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 159078748, + "step": 2402 + }, + { + "epoch": 0.22492628820143212, + "grad_norm": 21.244176864624023, + "learning_rate": 5e-05, + "loss": 1.3451, + "num_input_tokens_seen": 159145504, + "step": 2403 + }, + { + "epoch": 0.22492628820143212, + "loss": 1.3699020147323608, + "loss_ce": 0.0041793640702962875, + "loss_iou": 0.59375, + "loss_num": 0.035400390625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 159145504, + "step": 2403 + }, + { + "epoch": 0.22501989048532783, + "grad_norm": 19.881961822509766, + "learning_rate": 5e-05, + "loss": 1.5705, + "num_input_tokens_seen": 159212348, + "step": 2404 + }, + { + "epoch": 0.22501989048532783, + "loss": 1.5467236042022705, + "loss_ce": 0.00570790795609355, + "loss_iou": 0.67578125, + "loss_num": 0.037353515625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 159212348, + "step": 2404 + }, + { + "epoch": 0.22511349276922357, + "grad_norm": 38.16630172729492, + "learning_rate": 5e-05, + "loss": 1.4168, + "num_input_tokens_seen": 159279100, + "step": 2405 + }, + { + "epoch": 0.22511349276922357, + "loss": 1.619866132736206, + "loss_ce": 0.00658475561067462, + "loss_iou": 0.6015625, + "loss_num": 0.0810546875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 159279100, + "step": 2405 + }, + { + "epoch": 0.22520709505311928, + "grad_norm": 19.63469123840332, + "learning_rate": 5e-05, + "loss": 1.7053, + "num_input_tokens_seen": 159345336, + "step": 2406 + }, + { + "epoch": 0.22520709505311928, + "loss": 1.769553780555725, + "loss_ce": 0.004905343055725098, + "loss_iou": 0.73828125, + "loss_num": 0.05810546875, + "loss_xval": 1.765625, + "num_input_tokens_seen": 159345336, + "step": 2406 + }, + { + "epoch": 0.22530069733701502, + "grad_norm": 16.83295249938965, + "learning_rate": 5e-05, + "loss": 1.4323, + "num_input_tokens_seen": 159410568, + "step": 2407 + }, + { + "epoch": 0.22530069733701502, + "loss": 1.3493835926055908, + "loss_ce": 0.006121892482042313, + "loss_iou": 0.59375, + "loss_num": 0.031494140625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 159410568, + "step": 2407 + }, + { + "epoch": 0.22539429962091076, + "grad_norm": 23.202192306518555, + "learning_rate": 5e-05, + "loss": 1.4436, + "num_input_tokens_seen": 159477396, + "step": 2408 + }, + { + "epoch": 0.22539429962091076, + "loss": 1.3589022159576416, + "loss_ce": 0.0014803714584559202, + "loss_iou": 0.5703125, + "loss_num": 0.04248046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 159477396, + "step": 2408 + }, + { + "epoch": 0.22548790190480647, + "grad_norm": 33.11485290527344, + "learning_rate": 5e-05, + "loss": 1.2378, + "num_input_tokens_seen": 159543900, + "step": 2409 + }, + { + "epoch": 0.22548790190480647, + "loss": 1.1053540706634521, + "loss_ce": 0.0037914826534688473, + "loss_iou": 0.484375, + "loss_num": 0.0262451171875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 159543900, + "step": 2409 + }, + { + "epoch": 0.2255815041887022, + "grad_norm": 24.576881408691406, + "learning_rate": 5e-05, + "loss": 1.5728, + "num_input_tokens_seen": 159609492, + "step": 2410 + }, + { + "epoch": 0.2255815041887022, + "loss": 1.673002004623413, + "loss_ce": 0.006009810138493776, + "loss_iou": 0.68359375, + "loss_num": 0.06005859375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 159609492, + "step": 2410 + }, + { + "epoch": 0.22567510647259792, + "grad_norm": 10.936545372009277, + "learning_rate": 5e-05, + "loss": 1.3943, + "num_input_tokens_seen": 159675948, + "step": 2411 + }, + { + "epoch": 0.22567510647259792, + "loss": 1.3140408992767334, + "loss_ce": 0.004958903882652521, + "loss_iou": 0.546875, + "loss_num": 0.043701171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 159675948, + "step": 2411 + }, + { + "epoch": 0.22576870875649366, + "grad_norm": 25.12563133239746, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 159742976, + "step": 2412 + }, + { + "epoch": 0.22576870875649366, + "loss": 1.296428918838501, + "loss_ce": 0.0015070445369929075, + "loss_iou": 0.53515625, + "loss_num": 0.044189453125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 159742976, + "step": 2412 + }, + { + "epoch": 0.2258623110403894, + "grad_norm": 19.09864044189453, + "learning_rate": 5e-05, + "loss": 1.5128, + "num_input_tokens_seen": 159809092, + "step": 2413 + }, + { + "epoch": 0.2258623110403894, + "loss": 1.62244713306427, + "loss_ce": 0.010142412967979908, + "loss_iou": 0.5859375, + "loss_num": 0.0888671875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 159809092, + "step": 2413 + }, + { + "epoch": 0.2259559133242851, + "grad_norm": 16.004119873046875, + "learning_rate": 5e-05, + "loss": 1.3332, + "num_input_tokens_seen": 159874516, + "step": 2414 + }, + { + "epoch": 0.2259559133242851, + "loss": 1.2717313766479492, + "loss_ce": 0.003664960153400898, + "loss_iou": 0.55078125, + "loss_num": 0.03271484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 159874516, + "step": 2414 + }, + { + "epoch": 0.22604951560818085, + "grad_norm": 31.018705368041992, + "learning_rate": 5e-05, + "loss": 1.3362, + "num_input_tokens_seen": 159941204, + "step": 2415 + }, + { + "epoch": 0.22604951560818085, + "loss": 1.4464552402496338, + "loss_ce": 0.004072476644068956, + "loss_iou": 0.61328125, + "loss_num": 0.04296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 159941204, + "step": 2415 + }, + { + "epoch": 0.22614311789207656, + "grad_norm": 37.567779541015625, + "learning_rate": 5e-05, + "loss": 1.3376, + "num_input_tokens_seen": 160007588, + "step": 2416 + }, + { + "epoch": 0.22614311789207656, + "loss": 1.5280895233154297, + "loss_ce": 0.003675463143736124, + "loss_iou": 0.6171875, + "loss_num": 0.058837890625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 160007588, + "step": 2416 + }, + { + "epoch": 0.2262367201759723, + "grad_norm": 23.152658462524414, + "learning_rate": 5e-05, + "loss": 1.6915, + "num_input_tokens_seen": 160073604, + "step": 2417 + }, + { + "epoch": 0.2262367201759723, + "loss": 1.6573272943496704, + "loss_ce": 0.004983613267540932, + "loss_iou": 0.671875, + "loss_num": 0.061279296875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 160073604, + "step": 2417 + }, + { + "epoch": 0.22633032245986803, + "grad_norm": 14.996236801147461, + "learning_rate": 5e-05, + "loss": 1.4899, + "num_input_tokens_seen": 160139604, + "step": 2418 + }, + { + "epoch": 0.22633032245986803, + "loss": 1.6897783279418945, + "loss_ce": 0.00471983477473259, + "loss_iou": 0.640625, + "loss_num": 0.080078125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 160139604, + "step": 2418 + }, + { + "epoch": 0.22642392474376374, + "grad_norm": 25.997526168823242, + "learning_rate": 5e-05, + "loss": 1.3758, + "num_input_tokens_seen": 160205468, + "step": 2419 + }, + { + "epoch": 0.22642392474376374, + "loss": 1.330578327178955, + "loss_ce": 0.004406413994729519, + "loss_iou": 0.53515625, + "loss_num": 0.051025390625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 160205468, + "step": 2419 + }, + { + "epoch": 0.22651752702765948, + "grad_norm": 118.78583526611328, + "learning_rate": 5e-05, + "loss": 1.8314, + "num_input_tokens_seen": 160271812, + "step": 2420 + }, + { + "epoch": 0.22651752702765948, + "loss": 1.6269346475601196, + "loss_ce": 0.009502959437668324, + "loss_iou": 0.6484375, + "loss_num": 0.0634765625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 160271812, + "step": 2420 + }, + { + "epoch": 0.2266111293115552, + "grad_norm": 22.77692413330078, + "learning_rate": 5e-05, + "loss": 1.3515, + "num_input_tokens_seen": 160337740, + "step": 2421 + }, + { + "epoch": 0.2266111293115552, + "loss": 1.341922402381897, + "loss_ce": 0.0084262415766716, + "loss_iou": 0.5234375, + "loss_num": 0.057373046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 160337740, + "step": 2421 + }, + { + "epoch": 0.22670473159545093, + "grad_norm": 40.46633529663086, + "learning_rate": 5e-05, + "loss": 1.569, + "num_input_tokens_seen": 160405040, + "step": 2422 + }, + { + "epoch": 0.22670473159545093, + "loss": 1.482072353363037, + "loss_ce": 0.006486350670456886, + "loss_iou": 0.5859375, + "loss_num": 0.06005859375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 160405040, + "step": 2422 + }, + { + "epoch": 0.22679833387934667, + "grad_norm": 24.294353485107422, + "learning_rate": 5e-05, + "loss": 1.6327, + "num_input_tokens_seen": 160469804, + "step": 2423 + }, + { + "epoch": 0.22679833387934667, + "loss": 1.397963047027588, + "loss_ce": 0.004896578378975391, + "loss_iou": 0.54296875, + "loss_num": 0.0615234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 160469804, + "step": 2423 + }, + { + "epoch": 0.22689193616324238, + "grad_norm": 27.036087036132812, + "learning_rate": 5e-05, + "loss": 1.1462, + "num_input_tokens_seen": 160535412, + "step": 2424 + }, + { + "epoch": 0.22689193616324238, + "loss": 1.3932316303253174, + "loss_ce": 0.006024546921253204, + "loss_iou": 0.56640625, + "loss_num": 0.05126953125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 160535412, + "step": 2424 + }, + { + "epoch": 0.22698553844713812, + "grad_norm": 41.639774322509766, + "learning_rate": 5e-05, + "loss": 1.4283, + "num_input_tokens_seen": 160601904, + "step": 2425 + }, + { + "epoch": 0.22698553844713812, + "loss": 1.5586919784545898, + "loss_ce": 0.006934266537427902, + "loss_iou": 0.63671875, + "loss_num": 0.05615234375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 160601904, + "step": 2425 + }, + { + "epoch": 0.22707914073103383, + "grad_norm": 263.73138427734375, + "learning_rate": 5e-05, + "loss": 1.5827, + "num_input_tokens_seen": 160668892, + "step": 2426 + }, + { + "epoch": 0.22707914073103383, + "loss": 1.6234040260314941, + "loss_ce": 0.0023101852275431156, + "loss_iou": 0.6953125, + "loss_num": 0.046630859375, + "loss_xval": 1.625, + "num_input_tokens_seen": 160668892, + "step": 2426 + }, + { + "epoch": 0.22717274301492957, + "grad_norm": 19.2490291595459, + "learning_rate": 5e-05, + "loss": 1.1537, + "num_input_tokens_seen": 160736020, + "step": 2427 + }, + { + "epoch": 0.22717274301492957, + "loss": 0.9505199790000916, + "loss_ce": 0.0027661044150590897, + "loss_iou": 0.421875, + "loss_num": 0.0208740234375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 160736020, + "step": 2427 + }, + { + "epoch": 0.22726634529882528, + "grad_norm": 22.74749755859375, + "learning_rate": 5e-05, + "loss": 1.396, + "num_input_tokens_seen": 160802752, + "step": 2428 + }, + { + "epoch": 0.22726634529882528, + "loss": 1.4164843559265137, + "loss_ce": 0.005351502448320389, + "loss_iou": 0.5859375, + "loss_num": 0.04736328125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 160802752, + "step": 2428 + }, + { + "epoch": 0.22735994758272102, + "grad_norm": 23.023794174194336, + "learning_rate": 5e-05, + "loss": 1.3875, + "num_input_tokens_seen": 160869360, + "step": 2429 + }, + { + "epoch": 0.22735994758272102, + "loss": 1.240903377532959, + "loss_ce": 0.007260720245540142, + "loss_iou": 0.4765625, + "loss_num": 0.0556640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 160869360, + "step": 2429 + }, + { + "epoch": 0.22745354986661676, + "grad_norm": 31.687755584716797, + "learning_rate": 5e-05, + "loss": 1.4588, + "num_input_tokens_seen": 160934984, + "step": 2430 + }, + { + "epoch": 0.22745354986661676, + "loss": 1.3200249671936035, + "loss_ce": 0.00435119541361928, + "loss_iou": 0.5390625, + "loss_num": 0.0478515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 160934984, + "step": 2430 + }, + { + "epoch": 0.22754715215051247, + "grad_norm": 19.517658233642578, + "learning_rate": 5e-05, + "loss": 1.5534, + "num_input_tokens_seen": 160999604, + "step": 2431 + }, + { + "epoch": 0.22754715215051247, + "loss": 1.630081295967102, + "loss_ce": 0.002151642693206668, + "loss_iou": 0.6953125, + "loss_num": 0.0478515625, + "loss_xval": 1.625, + "num_input_tokens_seen": 160999604, + "step": 2431 + }, + { + "epoch": 0.2276407544344082, + "grad_norm": 18.604236602783203, + "learning_rate": 5e-05, + "loss": 1.3738, + "num_input_tokens_seen": 161065536, + "step": 2432 + }, + { + "epoch": 0.2276407544344082, + "loss": 1.4849238395690918, + "loss_ce": 0.005492770113050938, + "loss_iou": 0.54296875, + "loss_num": 0.07763671875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 161065536, + "step": 2432 + }, + { + "epoch": 0.22773435671830392, + "grad_norm": 13.682656288146973, + "learning_rate": 5e-05, + "loss": 1.3782, + "num_input_tokens_seen": 161132252, + "step": 2433 + }, + { + "epoch": 0.22773435671830392, + "loss": 1.3550634384155273, + "loss_ce": 0.010092698037624359, + "loss_iou": 0.5625, + "loss_num": 0.044189453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 161132252, + "step": 2433 + }, + { + "epoch": 0.22782795900219965, + "grad_norm": 22.1313419342041, + "learning_rate": 5e-05, + "loss": 1.4224, + "num_input_tokens_seen": 161199688, + "step": 2434 + }, + { + "epoch": 0.22782795900219965, + "loss": 1.3534135818481445, + "loss_ce": 0.0013627284206449986, + "loss_iou": 0.515625, + "loss_num": 0.064453125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 161199688, + "step": 2434 + }, + { + "epoch": 0.2279215612860954, + "grad_norm": 17.397193908691406, + "learning_rate": 5e-05, + "loss": 1.4757, + "num_input_tokens_seen": 161265896, + "step": 2435 + }, + { + "epoch": 0.2279215612860954, + "loss": 1.8262516260147095, + "loss_ce": 0.00984534528106451, + "loss_iou": 0.71875, + "loss_num": 0.07666015625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 161265896, + "step": 2435 + }, + { + "epoch": 0.2280151635699911, + "grad_norm": 90.07242584228516, + "learning_rate": 5e-05, + "loss": 1.3546, + "num_input_tokens_seen": 161333660, + "step": 2436 + }, + { + "epoch": 0.2280151635699911, + "loss": 1.1518394947052002, + "loss_ce": 0.003402027767151594, + "loss_iou": 0.494140625, + "loss_num": 0.0322265625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 161333660, + "step": 2436 + }, + { + "epoch": 0.22810876585388684, + "grad_norm": 21.776294708251953, + "learning_rate": 5e-05, + "loss": 1.3239, + "num_input_tokens_seen": 161398960, + "step": 2437 + }, + { + "epoch": 0.22810876585388684, + "loss": 1.1618947982788086, + "loss_ce": 0.006865587085485458, + "loss_iou": 0.48828125, + "loss_num": 0.03564453125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 161398960, + "step": 2437 + }, + { + "epoch": 0.22820236813778255, + "grad_norm": 38.253658294677734, + "learning_rate": 5e-05, + "loss": 1.5948, + "num_input_tokens_seen": 161465236, + "step": 2438 + }, + { + "epoch": 0.22820236813778255, + "loss": 1.6474385261535645, + "loss_ce": 0.0038838572800159454, + "loss_iou": 0.6640625, + "loss_num": 0.06298828125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 161465236, + "step": 2438 + }, + { + "epoch": 0.2282959704216783, + "grad_norm": 35.7276611328125, + "learning_rate": 5e-05, + "loss": 1.5618, + "num_input_tokens_seen": 161531424, + "step": 2439 + }, + { + "epoch": 0.2282959704216783, + "loss": 1.5822123289108276, + "loss_ce": 0.0045755887404084206, + "loss_iou": 0.625, + "loss_num": 0.0654296875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 161531424, + "step": 2439 + }, + { + "epoch": 0.22838957270557403, + "grad_norm": 29.47515106201172, + "learning_rate": 5e-05, + "loss": 1.5989, + "num_input_tokens_seen": 161597336, + "step": 2440 + }, + { + "epoch": 0.22838957270557403, + "loss": 1.7137210369110107, + "loss_ce": 0.006201460026204586, + "loss_iou": 0.703125, + "loss_num": 0.059326171875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 161597336, + "step": 2440 + }, + { + "epoch": 0.22848317498946974, + "grad_norm": 31.88434600830078, + "learning_rate": 5e-05, + "loss": 1.2366, + "num_input_tokens_seen": 161662992, + "step": 2441 + }, + { + "epoch": 0.22848317498946974, + "loss": 1.174156904220581, + "loss_ce": 0.007408848498016596, + "loss_iou": 0.421875, + "loss_num": 0.06494140625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 161662992, + "step": 2441 + }, + { + "epoch": 0.22857677727336548, + "grad_norm": 17.222997665405273, + "learning_rate": 5e-05, + "loss": 1.5757, + "num_input_tokens_seen": 161729752, + "step": 2442 + }, + { + "epoch": 0.22857677727336548, + "loss": 1.7142722606658936, + "loss_ce": 0.007241016253829002, + "loss_iou": 0.75, + "loss_num": 0.04150390625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 161729752, + "step": 2442 + }, + { + "epoch": 0.2286703795572612, + "grad_norm": 23.34244155883789, + "learning_rate": 5e-05, + "loss": 1.4665, + "num_input_tokens_seen": 161796252, + "step": 2443 + }, + { + "epoch": 0.2286703795572612, + "loss": 1.3133584260940552, + "loss_ce": 0.006717763841152191, + "loss_iou": 0.546875, + "loss_num": 0.041748046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 161796252, + "step": 2443 + }, + { + "epoch": 0.22876398184115693, + "grad_norm": 37.789886474609375, + "learning_rate": 5e-05, + "loss": 1.416, + "num_input_tokens_seen": 161863252, + "step": 2444 + }, + { + "epoch": 0.22876398184115693, + "loss": 1.38625168800354, + "loss_ce": 0.006857120431959629, + "loss_iou": 0.56640625, + "loss_num": 0.048583984375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 161863252, + "step": 2444 + }, + { + "epoch": 0.22885758412505264, + "grad_norm": 19.85990333557129, + "learning_rate": 5e-05, + "loss": 1.5014, + "num_input_tokens_seen": 161929072, + "step": 2445 + }, + { + "epoch": 0.22885758412505264, + "loss": 1.527203917503357, + "loss_ce": 0.0027899290435016155, + "loss_iou": 0.62890625, + "loss_num": 0.0537109375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 161929072, + "step": 2445 + }, + { + "epoch": 0.22895118640894838, + "grad_norm": 21.449256896972656, + "learning_rate": 5e-05, + "loss": 1.4077, + "num_input_tokens_seen": 161995272, + "step": 2446 + }, + { + "epoch": 0.22895118640894838, + "loss": 1.3202686309814453, + "loss_ce": 0.0019092089496552944, + "loss_iou": 0.5703125, + "loss_num": 0.0361328125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 161995272, + "step": 2446 + }, + { + "epoch": 0.22904478869284411, + "grad_norm": 18.70505142211914, + "learning_rate": 5e-05, + "loss": 1.3138, + "num_input_tokens_seen": 162061728, + "step": 2447 + }, + { + "epoch": 0.22904478869284411, + "loss": 1.2926361560821533, + "loss_ce": 0.01138614397495985, + "loss_iou": 0.5390625, + "loss_num": 0.0400390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 162061728, + "step": 2447 + }, + { + "epoch": 0.22913839097673983, + "grad_norm": 25.62189292907715, + "learning_rate": 5e-05, + "loss": 1.5155, + "num_input_tokens_seen": 162129428, + "step": 2448 + }, + { + "epoch": 0.22913839097673983, + "loss": 1.5001521110534668, + "loss_ce": 0.002593392040580511, + "loss_iou": 0.62109375, + "loss_num": 0.052001953125, + "loss_xval": 1.5, + "num_input_tokens_seen": 162129428, + "step": 2448 + }, + { + "epoch": 0.22923199326063556, + "grad_norm": 33.32771301269531, + "learning_rate": 5e-05, + "loss": 1.5063, + "num_input_tokens_seen": 162196064, + "step": 2449 + }, + { + "epoch": 0.22923199326063556, + "loss": 1.4936645030975342, + "loss_ce": 0.004895005375146866, + "loss_iou": 0.609375, + "loss_num": 0.0546875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 162196064, + "step": 2449 + }, + { + "epoch": 0.22932559554453127, + "grad_norm": 20.925540924072266, + "learning_rate": 5e-05, + "loss": 1.6122, + "num_input_tokens_seen": 162263060, + "step": 2450 + }, + { + "epoch": 0.22932559554453127, + "loss": 1.573733925819397, + "loss_ce": 0.007327648811042309, + "loss_iou": 0.6796875, + "loss_num": 0.04052734375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 162263060, + "step": 2450 + }, + { + "epoch": 0.229419197828427, + "grad_norm": 17.177642822265625, + "learning_rate": 5e-05, + "loss": 1.3064, + "num_input_tokens_seen": 162329316, + "step": 2451 + }, + { + "epoch": 0.229419197828427, + "loss": 1.2348629236221313, + "loss_ce": 0.00732386251911521, + "loss_iou": 0.443359375, + "loss_num": 0.06787109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 162329316, + "step": 2451 + }, + { + "epoch": 0.22951280011232275, + "grad_norm": 28.06122398376465, + "learning_rate": 5e-05, + "loss": 1.3107, + "num_input_tokens_seen": 162395300, + "step": 2452 + }, + { + "epoch": 0.22951280011232275, + "loss": 1.3751388788223267, + "loss_ce": 0.0040450915694236755, + "loss_iou": 0.56640625, + "loss_num": 0.04833984375, + "loss_xval": 1.375, + "num_input_tokens_seen": 162395300, + "step": 2452 + }, + { + "epoch": 0.22960640239621846, + "grad_norm": 22.68967056274414, + "learning_rate": 5e-05, + "loss": 1.6941, + "num_input_tokens_seen": 162461892, + "step": 2453 + }, + { + "epoch": 0.22960640239621846, + "loss": 1.5317165851593018, + "loss_ce": 0.0029080549720674753, + "loss_iou": 0.63671875, + "loss_num": 0.050537109375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 162461892, + "step": 2453 + }, + { + "epoch": 0.2297000046801142, + "grad_norm": 26.86604881286621, + "learning_rate": 5e-05, + "loss": 1.2431, + "num_input_tokens_seen": 162527000, + "step": 2454 + }, + { + "epoch": 0.2297000046801142, + "loss": 1.3855384588241577, + "loss_ce": 0.0052894484251737595, + "loss_iou": 0.53125, + "loss_num": 0.06298828125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 162527000, + "step": 2454 + }, + { + "epoch": 0.2297936069640099, + "grad_norm": 23.77687644958496, + "learning_rate": 5e-05, + "loss": 1.4307, + "num_input_tokens_seen": 162593708, + "step": 2455 + }, + { + "epoch": 0.2297936069640099, + "loss": 1.532151222229004, + "loss_ce": 0.005784048233181238, + "loss_iou": 0.6328125, + "loss_num": 0.052001953125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 162593708, + "step": 2455 + }, + { + "epoch": 0.22988720924790565, + "grad_norm": 27.570907592773438, + "learning_rate": 5e-05, + "loss": 1.1868, + "num_input_tokens_seen": 162659100, + "step": 2456 + }, + { + "epoch": 0.22988720924790565, + "loss": 1.067798376083374, + "loss_ce": 0.005298304371535778, + "loss_iou": 0.451171875, + "loss_num": 0.031982421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 162659100, + "step": 2456 + }, + { + "epoch": 0.2299808115318014, + "grad_norm": 25.42987632751465, + "learning_rate": 5e-05, + "loss": 1.4613, + "num_input_tokens_seen": 162725040, + "step": 2457 + }, + { + "epoch": 0.2299808115318014, + "loss": 1.4514778852462769, + "loss_ce": 0.006165330298244953, + "loss_iou": 0.59375, + "loss_num": 0.05224609375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 162725040, + "step": 2457 + }, + { + "epoch": 0.2300744138156971, + "grad_norm": 32.58747863769531, + "learning_rate": 5e-05, + "loss": 1.2337, + "num_input_tokens_seen": 162790956, + "step": 2458 + }, + { + "epoch": 0.2300744138156971, + "loss": 1.39103364944458, + "loss_ce": 0.0067563047632575035, + "loss_iou": 0.55859375, + "loss_num": 0.052734375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 162790956, + "step": 2458 + }, + { + "epoch": 0.23016801609959284, + "grad_norm": 68.71968841552734, + "learning_rate": 5e-05, + "loss": 1.4733, + "num_input_tokens_seen": 162857136, + "step": 2459 + }, + { + "epoch": 0.23016801609959284, + "loss": 1.5733240842819214, + "loss_ce": 0.006429579574614763, + "loss_iou": 0.6640625, + "loss_num": 0.04833984375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 162857136, + "step": 2459 + }, + { + "epoch": 0.23026161838348855, + "grad_norm": 43.434669494628906, + "learning_rate": 5e-05, + "loss": 1.3512, + "num_input_tokens_seen": 162923532, + "step": 2460 + }, + { + "epoch": 0.23026161838348855, + "loss": 1.428882122039795, + "loss_ce": 0.006030574440956116, + "loss_iou": 0.51953125, + "loss_num": 0.0771484375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 162923532, + "step": 2460 + }, + { + "epoch": 0.2303552206673843, + "grad_norm": 12.635337829589844, + "learning_rate": 5e-05, + "loss": 1.3098, + "num_input_tokens_seen": 162988940, + "step": 2461 + }, + { + "epoch": 0.2303552206673843, + "loss": 1.4358150959014893, + "loss_ce": 0.006982141640037298, + "loss_iou": 0.5859375, + "loss_num": 0.05224609375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 162988940, + "step": 2461 + }, + { + "epoch": 0.23044882295128002, + "grad_norm": 20.529218673706055, + "learning_rate": 5e-05, + "loss": 1.1557, + "num_input_tokens_seen": 163055324, + "step": 2462 + }, + { + "epoch": 0.23044882295128002, + "loss": 1.2897112369537354, + "loss_ce": 0.005043351091444492, + "loss_iou": 0.5, + "loss_num": 0.056640625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 163055324, + "step": 2462 + }, + { + "epoch": 0.23054242523517574, + "grad_norm": 138.20254516601562, + "learning_rate": 5e-05, + "loss": 1.2349, + "num_input_tokens_seen": 163121024, + "step": 2463 + }, + { + "epoch": 0.23054242523517574, + "loss": 1.0889167785644531, + "loss_ce": 0.0020027703139930964, + "loss_iou": 0.4453125, + "loss_num": 0.03857421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 163121024, + "step": 2463 + }, + { + "epoch": 0.23063602751907147, + "grad_norm": 49.121238708496094, + "learning_rate": 5e-05, + "loss": 1.4, + "num_input_tokens_seen": 163188208, + "step": 2464 + }, + { + "epoch": 0.23063602751907147, + "loss": 1.3350986242294312, + "loss_ce": 0.00453227711841464, + "loss_iou": 0.57421875, + "loss_num": 0.035888671875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 163188208, + "step": 2464 + }, + { + "epoch": 0.23072962980296718, + "grad_norm": 19.054418563842773, + "learning_rate": 5e-05, + "loss": 1.4856, + "num_input_tokens_seen": 163254636, + "step": 2465 + }, + { + "epoch": 0.23072962980296718, + "loss": 1.639803171157837, + "loss_ce": 0.005037473049014807, + "loss_iou": 0.71875, + "loss_num": 0.0390625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 163254636, + "step": 2465 + }, + { + "epoch": 0.23082323208686292, + "grad_norm": 18.13214683532715, + "learning_rate": 5e-05, + "loss": 1.3198, + "num_input_tokens_seen": 163320572, + "step": 2466 + }, + { + "epoch": 0.23082323208686292, + "loss": 1.2554048299789429, + "loss_ce": 0.0063813598826527596, + "loss_iou": 0.51171875, + "loss_num": 0.045166015625, + "loss_xval": 1.25, + "num_input_tokens_seen": 163320572, + "step": 2466 + }, + { + "epoch": 0.23091683437075863, + "grad_norm": 24.170122146606445, + "learning_rate": 5e-05, + "loss": 1.5222, + "num_input_tokens_seen": 163387168, + "step": 2467 + }, + { + "epoch": 0.23091683437075863, + "loss": 1.4282277822494507, + "loss_ce": 0.0024465336464345455, + "loss_iou": 0.5859375, + "loss_num": 0.05078125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 163387168, + "step": 2467 + }, + { + "epoch": 0.23101043665465437, + "grad_norm": 21.741044998168945, + "learning_rate": 5e-05, + "loss": 1.7628, + "num_input_tokens_seen": 163453236, + "step": 2468 + }, + { + "epoch": 0.23101043665465437, + "loss": 1.7581052780151367, + "loss_ce": 0.002245889976620674, + "loss_iou": 0.6953125, + "loss_num": 0.07373046875, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 163453236, + "step": 2468 + }, + { + "epoch": 0.2311040389385501, + "grad_norm": 17.663761138916016, + "learning_rate": 5e-05, + "loss": 1.4071, + "num_input_tokens_seen": 163520240, + "step": 2469 + }, + { + "epoch": 0.2311040389385501, + "loss": 1.6644530296325684, + "loss_ce": 0.006249843165278435, + "loss_iou": 0.64453125, + "loss_num": 0.0732421875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 163520240, + "step": 2469 + }, + { + "epoch": 0.23119764122244582, + "grad_norm": 22.036527633666992, + "learning_rate": 5e-05, + "loss": 1.2826, + "num_input_tokens_seen": 163586436, + "step": 2470 + }, + { + "epoch": 0.23119764122244582, + "loss": 1.221808671951294, + "loss_ce": 0.0015938271535560489, + "loss_iou": 0.546875, + "loss_num": 0.02490234375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 163586436, + "step": 2470 + }, + { + "epoch": 0.23129124350634156, + "grad_norm": 18.36457061767578, + "learning_rate": 5e-05, + "loss": 1.3659, + "num_input_tokens_seen": 163652996, + "step": 2471 + }, + { + "epoch": 0.23129124350634156, + "loss": 1.5381481647491455, + "loss_ce": 0.005433372221887112, + "loss_iou": 0.62109375, + "loss_num": 0.057861328125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 163652996, + "step": 2471 + }, + { + "epoch": 0.23138484579023727, + "grad_norm": 28.031639099121094, + "learning_rate": 5e-05, + "loss": 1.3604, + "num_input_tokens_seen": 163719344, + "step": 2472 + }, + { + "epoch": 0.23138484579023727, + "loss": 1.3550891876220703, + "loss_ce": 0.006944580003619194, + "loss_iou": 0.55859375, + "loss_num": 0.046630859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 163719344, + "step": 2472 + }, + { + "epoch": 0.231478448074133, + "grad_norm": 27.000850677490234, + "learning_rate": 5e-05, + "loss": 1.6887, + "num_input_tokens_seen": 163784896, + "step": 2473 + }, + { + "epoch": 0.231478448074133, + "loss": 2.0026726722717285, + "loss_ce": 0.007555382791906595, + "loss_iou": 0.75, + "loss_num": 0.09814453125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 163784896, + "step": 2473 + }, + { + "epoch": 0.23157205035802875, + "grad_norm": 31.808427810668945, + "learning_rate": 5e-05, + "loss": 1.4269, + "num_input_tokens_seen": 163850464, + "step": 2474 + }, + { + "epoch": 0.23157205035802875, + "loss": 1.3507449626922607, + "loss_ce": 0.006994950119405985, + "loss_iou": 0.50390625, + "loss_num": 0.0673828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 163850464, + "step": 2474 + }, + { + "epoch": 0.23166565264192446, + "grad_norm": 28.826396942138672, + "learning_rate": 5e-05, + "loss": 1.5188, + "num_input_tokens_seen": 163917052, + "step": 2475 + }, + { + "epoch": 0.23166565264192446, + "loss": 1.6371958255767822, + "loss_ce": 0.0014536024536937475, + "loss_iou": 0.6796875, + "loss_num": 0.0546875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 163917052, + "step": 2475 + }, + { + "epoch": 0.2317592549258202, + "grad_norm": 37.960697174072266, + "learning_rate": 5e-05, + "loss": 1.5083, + "num_input_tokens_seen": 163984472, + "step": 2476 + }, + { + "epoch": 0.2317592549258202, + "loss": 1.3793046474456787, + "loss_ce": 0.0028398879803717136, + "loss_iou": 0.56640625, + "loss_num": 0.048095703125, + "loss_xval": 1.375, + "num_input_tokens_seen": 163984472, + "step": 2476 + }, + { + "epoch": 0.2318528572097159, + "grad_norm": 20.171327590942383, + "learning_rate": 5e-05, + "loss": 1.6516, + "num_input_tokens_seen": 164049132, + "step": 2477 + }, + { + "epoch": 0.2318528572097159, + "loss": 1.6791355609893799, + "loss_ce": 0.0023777266032993793, + "loss_iou": 0.70703125, + "loss_num": 0.052978515625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 164049132, + "step": 2477 + }, + { + "epoch": 0.23194645949361165, + "grad_norm": 16.24278450012207, + "learning_rate": 5e-05, + "loss": 1.3162, + "num_input_tokens_seen": 164115196, + "step": 2478 + }, + { + "epoch": 0.23194645949361165, + "loss": 1.1775946617126465, + "loss_ce": 0.0057196225970983505, + "loss_iou": 0.498046875, + "loss_num": 0.035400390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 164115196, + "step": 2478 + }, + { + "epoch": 0.23204006177750738, + "grad_norm": 17.705053329467773, + "learning_rate": 5e-05, + "loss": 1.4433, + "num_input_tokens_seen": 164181904, + "step": 2479 + }, + { + "epoch": 0.23204006177750738, + "loss": 1.3429597616195679, + "loss_ce": 0.003115981351584196, + "loss_iou": 0.53515625, + "loss_num": 0.0537109375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 164181904, + "step": 2479 + }, + { + "epoch": 0.2321336640614031, + "grad_norm": 35.55115509033203, + "learning_rate": 5e-05, + "loss": 1.4613, + "num_input_tokens_seen": 164247788, + "step": 2480 + }, + { + "epoch": 0.2321336640614031, + "loss": 1.6351828575134277, + "loss_ce": 0.003347002901136875, + "loss_iou": 0.6796875, + "loss_num": 0.05517578125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 164247788, + "step": 2480 + }, + { + "epoch": 0.23222726634529883, + "grad_norm": 24.525876998901367, + "learning_rate": 5e-05, + "loss": 1.6514, + "num_input_tokens_seen": 164314020, + "step": 2481 + }, + { + "epoch": 0.23222726634529883, + "loss": 1.6250824928283691, + "loss_ce": 0.004476991947740316, + "loss_iou": 0.68359375, + "loss_num": 0.050537109375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 164314020, + "step": 2481 + }, + { + "epoch": 0.23232086862919454, + "grad_norm": 23.42792320251465, + "learning_rate": 5e-05, + "loss": 1.5881, + "num_input_tokens_seen": 164379820, + "step": 2482 + }, + { + "epoch": 0.23232086862919454, + "loss": 1.5978208780288696, + "loss_ce": 0.007000564597547054, + "loss_iou": 0.58984375, + "loss_num": 0.08203125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 164379820, + "step": 2482 + }, + { + "epoch": 0.23241447091309028, + "grad_norm": 24.587223052978516, + "learning_rate": 5e-05, + "loss": 1.5849, + "num_input_tokens_seen": 164446628, + "step": 2483 + }, + { + "epoch": 0.23241447091309028, + "loss": 1.4252188205718994, + "loss_ce": 0.0033439500257372856, + "loss_iou": 0.59765625, + "loss_num": 0.045654296875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 164446628, + "step": 2483 + }, + { + "epoch": 0.232508073196986, + "grad_norm": 37.54349136352539, + "learning_rate": 5e-05, + "loss": 1.6122, + "num_input_tokens_seen": 164512932, + "step": 2484 + }, + { + "epoch": 0.232508073196986, + "loss": 1.735964298248291, + "loss_ce": 0.00744863785803318, + "loss_iou": 0.703125, + "loss_num": 0.064453125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 164512932, + "step": 2484 + }, + { + "epoch": 0.23260167548088173, + "grad_norm": 31.54380226135254, + "learning_rate": 5e-05, + "loss": 1.7512, + "num_input_tokens_seen": 164579648, + "step": 2485 + }, + { + "epoch": 0.23260167548088173, + "loss": 1.6679766178131104, + "loss_ce": 0.004890634212642908, + "loss_iou": 0.6875, + "loss_num": 0.056884765625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 164579648, + "step": 2485 + }, + { + "epoch": 0.23269527776477747, + "grad_norm": 30.32734489440918, + "learning_rate": 5e-05, + "loss": 1.6871, + "num_input_tokens_seen": 164646276, + "step": 2486 + }, + { + "epoch": 0.23269527776477747, + "loss": 1.61893892288208, + "loss_ce": 0.006634257733821869, + "loss_iou": 0.62890625, + "loss_num": 0.0703125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 164646276, + "step": 2486 + }, + { + "epoch": 0.23278888004867318, + "grad_norm": 19.408811569213867, + "learning_rate": 5e-05, + "loss": 1.402, + "num_input_tokens_seen": 164711348, + "step": 2487 + }, + { + "epoch": 0.23278888004867318, + "loss": 1.409168004989624, + "loss_ce": 0.00743465218693018, + "loss_iou": 0.54296875, + "loss_num": 0.06298828125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 164711348, + "step": 2487 + }, + { + "epoch": 0.23288248233256892, + "grad_norm": 25.209152221679688, + "learning_rate": 5e-05, + "loss": 1.3289, + "num_input_tokens_seen": 164777464, + "step": 2488 + }, + { + "epoch": 0.23288248233256892, + "loss": 1.4044851064682007, + "loss_ce": 0.006535861641168594, + "loss_iou": 0.5859375, + "loss_num": 0.044677734375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 164777464, + "step": 2488 + }, + { + "epoch": 0.23297608461646463, + "grad_norm": 59.48554611206055, + "learning_rate": 5e-05, + "loss": 1.4737, + "num_input_tokens_seen": 164845024, + "step": 2489 + }, + { + "epoch": 0.23297608461646463, + "loss": 1.4949105978012085, + "loss_ce": 0.00272312480956316, + "loss_iou": 0.64453125, + "loss_num": 0.040771484375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 164845024, + "step": 2489 + }, + { + "epoch": 0.23306968690036037, + "grad_norm": 14.618805885314941, + "learning_rate": 5e-05, + "loss": 1.2474, + "num_input_tokens_seen": 164911024, + "step": 2490 + }, + { + "epoch": 0.23306968690036037, + "loss": 1.3296984434127808, + "loss_ce": 0.00401481706649065, + "loss_iou": 0.578125, + "loss_num": 0.034423828125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 164911024, + "step": 2490 + }, + { + "epoch": 0.2331632891842561, + "grad_norm": 39.86266326904297, + "learning_rate": 5e-05, + "loss": 1.2735, + "num_input_tokens_seen": 164978628, + "step": 2491 + }, + { + "epoch": 0.2331632891842561, + "loss": 1.2402257919311523, + "loss_ce": 0.0019445550860837102, + "loss_iou": 0.5390625, + "loss_num": 0.03271484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 164978628, + "step": 2491 + }, + { + "epoch": 0.23325689146815182, + "grad_norm": 47.401302337646484, + "learning_rate": 5e-05, + "loss": 1.3271, + "num_input_tokens_seen": 165046160, + "step": 2492 + }, + { + "epoch": 0.23325689146815182, + "loss": 1.4952867031097412, + "loss_ce": 0.0065172468312084675, + "loss_iou": 0.61328125, + "loss_num": 0.052978515625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 165046160, + "step": 2492 + }, + { + "epoch": 0.23335049375204756, + "grad_norm": 39.57675552368164, + "learning_rate": 5e-05, + "loss": 1.4569, + "num_input_tokens_seen": 165112804, + "step": 2493 + }, + { + "epoch": 0.23335049375204756, + "loss": 1.4527122974395752, + "loss_ce": 0.002516983076930046, + "loss_iou": 0.65234375, + "loss_num": 0.0286865234375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 165112804, + "step": 2493 + }, + { + "epoch": 0.23344409603594327, + "grad_norm": 28.581764221191406, + "learning_rate": 5e-05, + "loss": 1.2105, + "num_input_tokens_seen": 165178208, + "step": 2494 + }, + { + "epoch": 0.23344409603594327, + "loss": 1.2816894054412842, + "loss_ce": 0.004833861254155636, + "loss_iou": 0.490234375, + "loss_num": 0.059814453125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 165178208, + "step": 2494 + }, + { + "epoch": 0.233537698319839, + "grad_norm": 37.20583724975586, + "learning_rate": 5e-05, + "loss": 1.3242, + "num_input_tokens_seen": 165244368, + "step": 2495 + }, + { + "epoch": 0.233537698319839, + "loss": 1.6345446109771729, + "loss_ce": 0.005638356786221266, + "loss_iou": 0.6640625, + "loss_num": 0.059814453125, + "loss_xval": 1.625, + "num_input_tokens_seen": 165244368, + "step": 2495 + }, + { + "epoch": 0.23363130060373474, + "grad_norm": 36.57848358154297, + "learning_rate": 5e-05, + "loss": 1.2767, + "num_input_tokens_seen": 165309484, + "step": 2496 + }, + { + "epoch": 0.23363130060373474, + "loss": 1.1039924621582031, + "loss_ce": 0.005237653851509094, + "loss_iou": 0.43359375, + "loss_num": 0.0458984375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 165309484, + "step": 2496 + }, + { + "epoch": 0.23372490288763045, + "grad_norm": 21.84840965270996, + "learning_rate": 5e-05, + "loss": 1.4515, + "num_input_tokens_seen": 165375508, + "step": 2497 + }, + { + "epoch": 0.23372490288763045, + "loss": 1.5355206727981567, + "loss_ce": 0.0062238117679953575, + "loss_iou": 0.63671875, + "loss_num": 0.051025390625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 165375508, + "step": 2497 + }, + { + "epoch": 0.2338185051715262, + "grad_norm": 25.96573829650879, + "learning_rate": 5e-05, + "loss": 1.0797, + "num_input_tokens_seen": 165442652, + "step": 2498 + }, + { + "epoch": 0.2338185051715262, + "loss": 0.9413748979568481, + "loss_ce": 0.002898360835388303, + "loss_iou": 0.3828125, + "loss_num": 0.03466796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 165442652, + "step": 2498 + }, + { + "epoch": 0.2339121074554219, + "grad_norm": 20.81844139099121, + "learning_rate": 5e-05, + "loss": 1.321, + "num_input_tokens_seen": 165508484, + "step": 2499 + }, + { + "epoch": 0.2339121074554219, + "loss": 1.4115042686462402, + "loss_ce": 0.0052542174234986305, + "loss_iou": 0.59375, + "loss_num": 0.04443359375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 165508484, + "step": 2499 + }, + { + "epoch": 0.23400570973931764, + "grad_norm": 106.0998764038086, + "learning_rate": 5e-05, + "loss": 1.2157, + "num_input_tokens_seen": 165574880, + "step": 2500 + }, + { + "epoch": 0.23400570973931764, + "eval_seeclick_CIoU": 0.1758987456560135, + "eval_seeclick_GIoU": 0.1970961093902588, + "eval_seeclick_IoU": 0.285983145236969, + "eval_seeclick_MAE_all": 0.15556222945451736, + "eval_seeclick_MAE_h": 0.11707764118909836, + "eval_seeclick_MAE_w": 0.12314826250076294, + "eval_seeclick_MAE_x_boxes": 0.20792122185230255, + "eval_seeclick_MAE_y_boxes": 0.12664693593978882, + "eval_seeclick_NUM_probability": 0.9996010661125183, + "eval_seeclick_inside_bbox": 0.48750001192092896, + "eval_seeclick_loss": 2.4451475143432617, + "eval_seeclick_loss_ce": 0.014546331018209457, + "eval_seeclick_loss_iou": 0.839599609375, + "eval_seeclick_loss_num": 0.1605987548828125, + "eval_seeclick_loss_xval": 2.479736328125, + "eval_seeclick_runtime": 70.2802, + "eval_seeclick_samples_per_second": 0.669, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 165574880, + "step": 2500 + }, + { + "epoch": 0.23400570973931764, + "eval_icons_CIoU": -0.07744761649519205, + "eval_icons_GIoU": 0.010248284786939621, + "eval_icons_IoU": 0.10521091893315315, + "eval_icons_MAE_all": 0.1695949211716652, + "eval_icons_MAE_h": 0.09764321148395538, + "eval_icons_MAE_w": 0.20428355783224106, + "eval_icons_MAE_x_boxes": 0.13130392134189606, + "eval_icons_MAE_y_boxes": 0.10688390210270882, + "eval_icons_NUM_probability": 0.9999231398105621, + "eval_icons_inside_bbox": 0.1614583358168602, + "eval_icons_loss": 2.9076406955718994, + "eval_icons_loss_ce": 0.00013697430404135957, + "eval_icons_loss_iou": 1.0185546875, + "eval_icons_loss_num": 0.1790771484375, + "eval_icons_loss_xval": 2.931640625, + "eval_icons_runtime": 66.9052, + "eval_icons_samples_per_second": 0.747, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 165574880, + "step": 2500 + }, + { + "epoch": 0.23400570973931764, + "eval_screenspot_CIoU": 0.021585943798224132, + "eval_screenspot_GIoU": 0.03971382789313793, + "eval_screenspot_IoU": 0.18650566041469574, + "eval_screenspot_MAE_all": 0.20787508289019266, + "eval_screenspot_MAE_h": 0.17082890371481577, + "eval_screenspot_MAE_w": 0.17836354921261469, + "eval_screenspot_MAE_x_boxes": 0.22417324284712473, + "eval_screenspot_MAE_y_boxes": 0.16064871350924173, + "eval_screenspot_NUM_probability": 0.9998685717582703, + "eval_screenspot_inside_bbox": 0.38333333532015484, + "eval_screenspot_loss": 3.0285556316375732, + "eval_screenspot_loss_ce": 0.023904730876286823, + "eval_screenspot_loss_iou": 0.974609375, + "eval_screenspot_loss_num": 0.21490478515625, + "eval_screenspot_loss_xval": 3.0231119791666665, + "eval_screenspot_runtime": 124.9603, + "eval_screenspot_samples_per_second": 0.712, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 165574880, + "step": 2500 + }, + { + "epoch": 0.23400570973931764, + "eval_compot_CIoU": 0.00973070040345192, + "eval_compot_GIoU": 0.06087764725089073, + "eval_compot_IoU": 0.16261964663863182, + "eval_compot_MAE_all": 0.18344344943761826, + "eval_compot_MAE_h": 0.08078011125326157, + "eval_compot_MAE_w": 0.23833151906728745, + "eval_compot_MAE_x_boxes": 0.15341763198375702, + "eval_compot_MAE_y_boxes": 0.1260961815714836, + "eval_compot_NUM_probability": 0.9998997449874878, + "eval_compot_inside_bbox": 0.3038194477558136, + "eval_compot_loss": 2.89353609085083, + "eval_compot_loss_ce": 0.007296753115952015, + "eval_compot_loss_iou": 0.966064453125, + "eval_compot_loss_num": 0.190185546875, + "eval_compot_loss_xval": 2.8828125, + "eval_compot_runtime": 78.7073, + "eval_compot_samples_per_second": 0.635, + "eval_compot_steps_per_second": 0.025, + "num_input_tokens_seen": 165574880, + "step": 2500 + }, + { + "epoch": 0.23400570973931764, + "eval_custom_ui_MAE_all": 0.15342652052640915, + "eval_custom_ui_MAE_x": 0.14219188317656517, + "eval_custom_ui_MAE_y": 0.16466113924980164, + "eval_custom_ui_NUM_probability": 0.9999070465564728, + "eval_custom_ui_loss": 0.8835628032684326, + "eval_custom_ui_loss_ce": 0.16172069311141968, + "eval_custom_ui_loss_num": 0.148651123046875, + "eval_custom_ui_loss_xval": 0.743408203125, + "eval_custom_ui_runtime": 53.8258, + "eval_custom_ui_samples_per_second": 0.929, + "eval_custom_ui_steps_per_second": 0.037, + "num_input_tokens_seen": 165574880, + "step": 2500 + }, + { + "epoch": 0.23400570973931764, + "loss": 0.8828193545341492, + "loss_ce": 0.17334669828414917, + "loss_iou": 0.0, + "loss_num": 0.1416015625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 165574880, + "step": 2500 + }, + { + "epoch": 0.23409931202321338, + "grad_norm": 24.63797378540039, + "learning_rate": 5e-05, + "loss": 1.4581, + "num_input_tokens_seen": 165640432, + "step": 2501 + }, + { + "epoch": 0.23409931202321338, + "loss": 1.4565620422363281, + "loss_ce": 0.003437077160924673, + "loss_iou": 0.5703125, + "loss_num": 0.0625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 165640432, + "step": 2501 + }, + { + "epoch": 0.2341929143071091, + "grad_norm": 39.45976638793945, + "learning_rate": 5e-05, + "loss": 1.3657, + "num_input_tokens_seen": 165707380, + "step": 2502 + }, + { + "epoch": 0.2341929143071091, + "loss": 1.3734135627746582, + "loss_ce": 0.001831514062359929, + "loss_iou": 0.5859375, + "loss_num": 0.040771484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 165707380, + "step": 2502 + }, + { + "epoch": 0.23428651659100483, + "grad_norm": 18.85283088684082, + "learning_rate": 5e-05, + "loss": 1.1919, + "num_input_tokens_seen": 165773616, + "step": 2503 + }, + { + "epoch": 0.23428651659100483, + "loss": 1.2751984596252441, + "loss_ce": 0.004690650384873152, + "loss_iou": 0.54296875, + "loss_num": 0.037353515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 165773616, + "step": 2503 + }, + { + "epoch": 0.23438011887490054, + "grad_norm": 15.156516075134277, + "learning_rate": 5e-05, + "loss": 1.2399, + "num_input_tokens_seen": 165839672, + "step": 2504 + }, + { + "epoch": 0.23438011887490054, + "loss": 1.2254211902618408, + "loss_ce": 0.00435184221714735, + "loss_iou": 0.48828125, + "loss_num": 0.048583984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 165839672, + "step": 2504 + }, + { + "epoch": 0.23447372115879628, + "grad_norm": 40.05813217163086, + "learning_rate": 5e-05, + "loss": 1.4874, + "num_input_tokens_seen": 165905700, + "step": 2505 + }, + { + "epoch": 0.23447372115879628, + "loss": 1.6928784847259521, + "loss_ce": 0.005378500558435917, + "loss_iou": 0.68359375, + "loss_num": 0.064453125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 165905700, + "step": 2505 + }, + { + "epoch": 0.234567323442692, + "grad_norm": 82.37883758544922, + "learning_rate": 5e-05, + "loss": 1.8889, + "num_input_tokens_seen": 165972368, + "step": 2506 + }, + { + "epoch": 0.234567323442692, + "loss": 1.720952033996582, + "loss_ce": 0.007084723096340895, + "loss_iou": 0.71875, + "loss_num": 0.0556640625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 165972368, + "step": 2506 + }, + { + "epoch": 0.23466092572658773, + "grad_norm": 25.550870895385742, + "learning_rate": 5e-05, + "loss": 1.5326, + "num_input_tokens_seen": 166037584, + "step": 2507 + }, + { + "epoch": 0.23466092572658773, + "loss": 1.562875747680664, + "loss_ce": 0.003793750423938036, + "loss_iou": 0.61328125, + "loss_num": 0.06689453125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 166037584, + "step": 2507 + }, + { + "epoch": 0.23475452801048347, + "grad_norm": 35.482093811035156, + "learning_rate": 5e-05, + "loss": 1.3808, + "num_input_tokens_seen": 166103516, + "step": 2508 + }, + { + "epoch": 0.23475452801048347, + "loss": 1.5787138938903809, + "loss_ce": 0.005471743177622557, + "loss_iou": 0.65625, + "loss_num": 0.052734375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 166103516, + "step": 2508 + }, + { + "epoch": 0.23484813029437918, + "grad_norm": 25.37134552001953, + "learning_rate": 5e-05, + "loss": 1.4643, + "num_input_tokens_seen": 166168932, + "step": 2509 + }, + { + "epoch": 0.23484813029437918, + "loss": 1.3653082847595215, + "loss_ce": 0.005933395121246576, + "loss_iou": 0.578125, + "loss_num": 0.041259765625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 166168932, + "step": 2509 + }, + { + "epoch": 0.23494173257827491, + "grad_norm": 16.15148162841797, + "learning_rate": 5e-05, + "loss": 1.1893, + "num_input_tokens_seen": 166235624, + "step": 2510 + }, + { + "epoch": 0.23494173257827491, + "loss": 1.31050705909729, + "loss_ce": 0.0014250393724069, + "loss_iou": 0.54296875, + "loss_num": 0.04443359375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 166235624, + "step": 2510 + }, + { + "epoch": 0.23503533486217063, + "grad_norm": 24.526351928710938, + "learning_rate": 5e-05, + "loss": 1.2833, + "num_input_tokens_seen": 166301484, + "step": 2511 + }, + { + "epoch": 0.23503533486217063, + "loss": 1.2813926935195923, + "loss_ce": 0.0016074995510280132, + "loss_iou": 0.5546875, + "loss_num": 0.0341796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 166301484, + "step": 2511 + }, + { + "epoch": 0.23512893714606636, + "grad_norm": 19.538311004638672, + "learning_rate": 5e-05, + "loss": 1.6607, + "num_input_tokens_seen": 166368404, + "step": 2512 + }, + { + "epoch": 0.23512893714606636, + "loss": 1.573952078819275, + "loss_ce": 0.006569328717887402, + "loss_iou": 0.6640625, + "loss_num": 0.047119140625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 166368404, + "step": 2512 + }, + { + "epoch": 0.2352225394299621, + "grad_norm": 53.490726470947266, + "learning_rate": 5e-05, + "loss": 1.3682, + "num_input_tokens_seen": 166432760, + "step": 2513 + }, + { + "epoch": 0.2352225394299621, + "loss": 1.5700562000274658, + "loss_ce": 0.006091253831982613, + "loss_iou": 0.64453125, + "loss_num": 0.055419921875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 166432760, + "step": 2513 + }, + { + "epoch": 0.2353161417138578, + "grad_norm": 13.956862449645996, + "learning_rate": 5e-05, + "loss": 1.4508, + "num_input_tokens_seen": 166498608, + "step": 2514 + }, + { + "epoch": 0.2353161417138578, + "loss": 1.46309232711792, + "loss_ce": 0.008014105260372162, + "loss_iou": 0.609375, + "loss_num": 0.04736328125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 166498608, + "step": 2514 + }, + { + "epoch": 0.23540974399775355, + "grad_norm": 18.782583236694336, + "learning_rate": 5e-05, + "loss": 1.3075, + "num_input_tokens_seen": 166564932, + "step": 2515 + }, + { + "epoch": 0.23540974399775355, + "loss": 1.373844027519226, + "loss_ce": 0.0027503168676048517, + "loss_iou": 0.609375, + "loss_num": 0.0311279296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 166564932, + "step": 2515 + }, + { + "epoch": 0.23550334628164926, + "grad_norm": 16.891401290893555, + "learning_rate": 5e-05, + "loss": 1.3846, + "num_input_tokens_seen": 166632072, + "step": 2516 + }, + { + "epoch": 0.23550334628164926, + "loss": 1.401080846786499, + "loss_ce": 0.0060612596571445465, + "loss_iou": 0.5625, + "loss_num": 0.05419921875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 166632072, + "step": 2516 + }, + { + "epoch": 0.235596948565545, + "grad_norm": 31.363994598388672, + "learning_rate": 5e-05, + "loss": 1.3179, + "num_input_tokens_seen": 166698428, + "step": 2517 + }, + { + "epoch": 0.235596948565545, + "loss": 1.6091729402542114, + "loss_ce": 0.008586999028921127, + "loss_iou": 0.61328125, + "loss_num": 0.0751953125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 166698428, + "step": 2517 + }, + { + "epoch": 0.23569055084944074, + "grad_norm": 22.80681610107422, + "learning_rate": 5e-05, + "loss": 1.5486, + "num_input_tokens_seen": 166764724, + "step": 2518 + }, + { + "epoch": 0.23569055084944074, + "loss": 1.6586039066314697, + "loss_ce": 0.00430699298158288, + "loss_iou": 0.65625, + "loss_num": 0.068359375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 166764724, + "step": 2518 + }, + { + "epoch": 0.23578415313333645, + "grad_norm": 23.773784637451172, + "learning_rate": 5e-05, + "loss": 1.41, + "num_input_tokens_seen": 166831072, + "step": 2519 + }, + { + "epoch": 0.23578415313333645, + "loss": 1.5527911186218262, + "loss_ce": 0.007869203574955463, + "loss_iou": 0.62890625, + "loss_num": 0.05712890625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 166831072, + "step": 2519 + }, + { + "epoch": 0.2358777554172322, + "grad_norm": 26.95271110534668, + "learning_rate": 5e-05, + "loss": 1.6262, + "num_input_tokens_seen": 166897312, + "step": 2520 + }, + { + "epoch": 0.2358777554172322, + "loss": 1.5096721649169922, + "loss_ce": 0.005765873938798904, + "loss_iou": 0.6953125, + "loss_num": 0.0234375, + "loss_xval": 1.5, + "num_input_tokens_seen": 166897312, + "step": 2520 + }, + { + "epoch": 0.2359713577011279, + "grad_norm": 34.8698844909668, + "learning_rate": 5e-05, + "loss": 1.5628, + "num_input_tokens_seen": 166964008, + "step": 2521 + }, + { + "epoch": 0.2359713577011279, + "loss": 1.5502536296844482, + "loss_ce": 0.00386689486913383, + "loss_iou": 0.640625, + "loss_num": 0.0537109375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 166964008, + "step": 2521 + }, + { + "epoch": 0.23606495998502364, + "grad_norm": 47.92771530151367, + "learning_rate": 5e-05, + "loss": 1.5755, + "num_input_tokens_seen": 167029896, + "step": 2522 + }, + { + "epoch": 0.23606495998502364, + "loss": 1.4616825580596924, + "loss_ce": 0.004651276394724846, + "loss_iou": 0.62109375, + "loss_num": 0.042724609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 167029896, + "step": 2522 + }, + { + "epoch": 0.23615856226891938, + "grad_norm": 13.946309089660645, + "learning_rate": 5e-05, + "loss": 1.438, + "num_input_tokens_seen": 167096340, + "step": 2523 + }, + { + "epoch": 0.23615856226891938, + "loss": 1.4711437225341797, + "loss_ce": 0.0028819835279136896, + "loss_iou": 0.59375, + "loss_num": 0.056396484375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 167096340, + "step": 2523 + }, + { + "epoch": 0.23625216455281509, + "grad_norm": 22.62273406982422, + "learning_rate": 5e-05, + "loss": 1.2367, + "num_input_tokens_seen": 167163156, + "step": 2524 + }, + { + "epoch": 0.23625216455281509, + "loss": 1.2503703832626343, + "loss_ce": 0.00622974056750536, + "loss_iou": 0.55078125, + "loss_num": 0.0291748046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 167163156, + "step": 2524 + }, + { + "epoch": 0.23634576683671082, + "grad_norm": 21.35040855407715, + "learning_rate": 5e-05, + "loss": 1.2765, + "num_input_tokens_seen": 167229016, + "step": 2525 + }, + { + "epoch": 0.23634576683671082, + "loss": 1.3842897415161133, + "loss_ce": 0.0034303911961615086, + "loss_iou": 0.5390625, + "loss_num": 0.060791015625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 167229016, + "step": 2525 + }, + { + "epoch": 0.23643936912060654, + "grad_norm": 29.040626525878906, + "learning_rate": 5e-05, + "loss": 1.3072, + "num_input_tokens_seen": 167294660, + "step": 2526 + }, + { + "epoch": 0.23643936912060654, + "loss": 1.3200488090515137, + "loss_ce": 0.006572265177965164, + "loss_iou": 0.53125, + "loss_num": 0.049560546875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 167294660, + "step": 2526 + }, + { + "epoch": 0.23653297140450227, + "grad_norm": 36.906776428222656, + "learning_rate": 5e-05, + "loss": 1.3674, + "num_input_tokens_seen": 167361328, + "step": 2527 + }, + { + "epoch": 0.23653297140450227, + "loss": 1.4203541278839111, + "loss_ce": 0.0028736297972500324, + "loss_iou": 0.61328125, + "loss_num": 0.0380859375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 167361328, + "step": 2527 + }, + { + "epoch": 0.23662657368839798, + "grad_norm": 25.944272994995117, + "learning_rate": 5e-05, + "loss": 1.4463, + "num_input_tokens_seen": 167427928, + "step": 2528 + }, + { + "epoch": 0.23662657368839798, + "loss": 1.4592499732971191, + "loss_ce": 0.0051484620198607445, + "loss_iou": 0.6015625, + "loss_num": 0.050048828125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 167427928, + "step": 2528 + }, + { + "epoch": 0.23672017597229372, + "grad_norm": 26.98100471496582, + "learning_rate": 5e-05, + "loss": 1.1391, + "num_input_tokens_seen": 167493916, + "step": 2529 + }, + { + "epoch": 0.23672017597229372, + "loss": 1.1518512964248657, + "loss_ce": 0.008784936740994453, + "loss_iou": 0.490234375, + "loss_num": 0.032470703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 167493916, + "step": 2529 + }, + { + "epoch": 0.23681377825618946, + "grad_norm": 19.34450912475586, + "learning_rate": 5e-05, + "loss": 1.4203, + "num_input_tokens_seen": 167559904, + "step": 2530 + }, + { + "epoch": 0.23681377825618946, + "loss": 1.5328006744384766, + "loss_ce": 0.01131628081202507, + "loss_iou": 0.66796875, + "loss_num": 0.037841796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 167559904, + "step": 2530 + }, + { + "epoch": 0.23690738054008517, + "grad_norm": 15.683530807495117, + "learning_rate": 5e-05, + "loss": 1.1991, + "num_input_tokens_seen": 167625640, + "step": 2531 + }, + { + "epoch": 0.23690738054008517, + "loss": 1.133538007736206, + "loss_ce": 0.006584820803254843, + "loss_iou": 0.4765625, + "loss_num": 0.03466796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 167625640, + "step": 2531 + }, + { + "epoch": 0.2370009828239809, + "grad_norm": 32.248294830322266, + "learning_rate": 5e-05, + "loss": 1.4016, + "num_input_tokens_seen": 167692004, + "step": 2532 + }, + { + "epoch": 0.2370009828239809, + "loss": 1.3093624114990234, + "loss_ce": 0.005773622542619705, + "loss_iou": 0.53515625, + "loss_num": 0.046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 167692004, + "step": 2532 + }, + { + "epoch": 0.23709458510787662, + "grad_norm": 69.06314849853516, + "learning_rate": 5e-05, + "loss": 1.3207, + "num_input_tokens_seen": 167757324, + "step": 2533 + }, + { + "epoch": 0.23709458510787662, + "loss": 1.0936673879623413, + "loss_ce": 0.006265057250857353, + "loss_iou": 0.400390625, + "loss_num": 0.056884765625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 167757324, + "step": 2533 + }, + { + "epoch": 0.23718818739177236, + "grad_norm": 18.926410675048828, + "learning_rate": 5e-05, + "loss": 1.5909, + "num_input_tokens_seen": 167825068, + "step": 2534 + }, + { + "epoch": 0.23718818739177236, + "loss": 1.6822926998138428, + "loss_ce": 0.0035817273892462254, + "loss_iou": 0.65234375, + "loss_num": 0.0751953125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 167825068, + "step": 2534 + }, + { + "epoch": 0.2372817896756681, + "grad_norm": 24.251678466796875, + "learning_rate": 5e-05, + "loss": 1.3461, + "num_input_tokens_seen": 167891032, + "step": 2535 + }, + { + "epoch": 0.2372817896756681, + "loss": 1.2933571338653564, + "loss_ce": 0.008689153008162975, + "loss_iou": 0.546875, + "loss_num": 0.037353515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 167891032, + "step": 2535 + }, + { + "epoch": 0.2373753919595638, + "grad_norm": 21.610767364501953, + "learning_rate": 5e-05, + "loss": 1.4972, + "num_input_tokens_seen": 167957360, + "step": 2536 + }, + { + "epoch": 0.2373753919595638, + "loss": 1.4585134983062744, + "loss_ce": 0.005510497838258743, + "loss_iou": 0.578125, + "loss_num": 0.06005859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 167957360, + "step": 2536 + }, + { + "epoch": 0.23746899424345955, + "grad_norm": 25.770593643188477, + "learning_rate": 5e-05, + "loss": 1.4145, + "num_input_tokens_seen": 168024016, + "step": 2537 + }, + { + "epoch": 0.23746899424345955, + "loss": 1.599055290222168, + "loss_ce": 0.0033521135337650776, + "loss_iou": 0.6484375, + "loss_num": 0.059326171875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 168024016, + "step": 2537 + }, + { + "epoch": 0.23756259652735526, + "grad_norm": 26.87066078186035, + "learning_rate": 5e-05, + "loss": 1.4639, + "num_input_tokens_seen": 168090236, + "step": 2538 + }, + { + "epoch": 0.23756259652735526, + "loss": 1.3521130084991455, + "loss_ce": 0.004456783644855022, + "loss_iou": 0.5859375, + "loss_num": 0.035400390625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 168090236, + "step": 2538 + }, + { + "epoch": 0.237656198811251, + "grad_norm": 33.97237014770508, + "learning_rate": 5e-05, + "loss": 1.4855, + "num_input_tokens_seen": 168156936, + "step": 2539 + }, + { + "epoch": 0.237656198811251, + "loss": 1.5603784322738647, + "loss_ce": 0.007644066587090492, + "loss_iou": 0.6328125, + "loss_num": 0.057861328125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 168156936, + "step": 2539 + }, + { + "epoch": 0.23774980109514673, + "grad_norm": 42.24701690673828, + "learning_rate": 5e-05, + "loss": 1.2734, + "num_input_tokens_seen": 168221500, + "step": 2540 + }, + { + "epoch": 0.23774980109514673, + "loss": 1.040938138961792, + "loss_ce": 0.007857018150389194, + "loss_iou": 0.439453125, + "loss_num": 0.030517578125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 168221500, + "step": 2540 + }, + { + "epoch": 0.23784340337904245, + "grad_norm": 31.573814392089844, + "learning_rate": 5e-05, + "loss": 1.4149, + "num_input_tokens_seen": 168287724, + "step": 2541 + }, + { + "epoch": 0.23784340337904245, + "loss": 1.4590835571289062, + "loss_ce": 0.006935225334018469, + "loss_iou": 0.609375, + "loss_num": 0.0458984375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 168287724, + "step": 2541 + }, + { + "epoch": 0.23793700566293818, + "grad_norm": 21.765670776367188, + "learning_rate": 5e-05, + "loss": 1.7283, + "num_input_tokens_seen": 168354320, + "step": 2542 + }, + { + "epoch": 0.23793700566293818, + "loss": 1.7900198698043823, + "loss_ce": 0.008769959211349487, + "loss_iou": 0.70703125, + "loss_num": 0.0732421875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 168354320, + "step": 2542 + }, + { + "epoch": 0.2380306079468339, + "grad_norm": 14.011946678161621, + "learning_rate": 5e-05, + "loss": 1.3828, + "num_input_tokens_seen": 168420804, + "step": 2543 + }, + { + "epoch": 0.2380306079468339, + "loss": 1.226111650466919, + "loss_ce": 0.0019905937369912863, + "loss_iou": 0.498046875, + "loss_num": 0.045654296875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 168420804, + "step": 2543 + }, + { + "epoch": 0.23812421023072963, + "grad_norm": 16.307533264160156, + "learning_rate": 5e-05, + "loss": 1.0762, + "num_input_tokens_seen": 168486652, + "step": 2544 + }, + { + "epoch": 0.23812421023072963, + "loss": 1.153876543045044, + "loss_ce": 0.0034858197905123234, + "loss_iou": 0.50390625, + "loss_num": 0.0284423828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 168486652, + "step": 2544 + }, + { + "epoch": 0.23821781251462534, + "grad_norm": 42.976375579833984, + "learning_rate": 5e-05, + "loss": 1.509, + "num_input_tokens_seen": 168553540, + "step": 2545 + }, + { + "epoch": 0.23821781251462534, + "loss": 1.487226128578186, + "loss_ce": 0.006269030272960663, + "loss_iou": 0.58984375, + "loss_num": 0.060302734375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 168553540, + "step": 2545 + }, + { + "epoch": 0.23831141479852108, + "grad_norm": 24.15453338623047, + "learning_rate": 5e-05, + "loss": 1.6722, + "num_input_tokens_seen": 168620332, + "step": 2546 + }, + { + "epoch": 0.23831141479852108, + "loss": 1.7856779098510742, + "loss_ce": 0.007357646245509386, + "loss_iou": 0.7578125, + "loss_num": 0.052978515625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 168620332, + "step": 2546 + }, + { + "epoch": 0.23840501708241682, + "grad_norm": 27.00608253479004, + "learning_rate": 5e-05, + "loss": 1.299, + "num_input_tokens_seen": 168686700, + "step": 2547 + }, + { + "epoch": 0.23840501708241682, + "loss": 1.1916871070861816, + "loss_ce": 0.0071167671121656895, + "loss_iou": 0.4453125, + "loss_num": 0.05859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 168686700, + "step": 2547 + }, + { + "epoch": 0.23849861936631253, + "grad_norm": 33.46535873413086, + "learning_rate": 5e-05, + "loss": 1.5463, + "num_input_tokens_seen": 168752716, + "step": 2548 + }, + { + "epoch": 0.23849861936631253, + "loss": 1.7102900743484497, + "loss_ce": 0.007165055721998215, + "loss_iou": 0.75390625, + "loss_num": 0.039794921875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 168752716, + "step": 2548 + }, + { + "epoch": 0.23859222165020827, + "grad_norm": 25.385046005249023, + "learning_rate": 5e-05, + "loss": 1.7107, + "num_input_tokens_seen": 168819832, + "step": 2549 + }, + { + "epoch": 0.23859222165020827, + "loss": 1.8767709732055664, + "loss_ce": 0.004700660705566406, + "loss_iou": 0.80859375, + "loss_num": 0.051025390625, + "loss_xval": 1.875, + "num_input_tokens_seen": 168819832, + "step": 2549 + }, + { + "epoch": 0.23868582393410398, + "grad_norm": 29.79375457763672, + "learning_rate": 5e-05, + "loss": 1.3082, + "num_input_tokens_seen": 168885716, + "step": 2550 + }, + { + "epoch": 0.23868582393410398, + "loss": 1.150732159614563, + "loss_ce": 0.0037594810128211975, + "loss_iou": 0.494140625, + "loss_num": 0.0322265625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 168885716, + "step": 2550 + }, + { + "epoch": 0.23877942621799972, + "grad_norm": 19.679378509521484, + "learning_rate": 5e-05, + "loss": 1.5818, + "num_input_tokens_seen": 168950856, + "step": 2551 + }, + { + "epoch": 0.23877942621799972, + "loss": 1.5313818454742432, + "loss_ce": 0.009897556155920029, + "loss_iou": 0.54296875, + "loss_num": 0.0869140625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 168950856, + "step": 2551 + }, + { + "epoch": 0.23887302850189546, + "grad_norm": 35.51392364501953, + "learning_rate": 5e-05, + "loss": 1.3755, + "num_input_tokens_seen": 169017284, + "step": 2552 + }, + { + "epoch": 0.23887302850189546, + "loss": 1.4114516973495483, + "loss_ce": 0.004225119948387146, + "loss_iou": 0.58203125, + "loss_num": 0.04931640625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 169017284, + "step": 2552 + }, + { + "epoch": 0.23896663078579117, + "grad_norm": 17.957441329956055, + "learning_rate": 5e-05, + "loss": 1.6443, + "num_input_tokens_seen": 169083700, + "step": 2553 + }, + { + "epoch": 0.23896663078579117, + "loss": 1.655320167541504, + "loss_ce": 0.010788802057504654, + "loss_iou": 0.6640625, + "loss_num": 0.0634765625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 169083700, + "step": 2553 + }, + { + "epoch": 0.2390602330696869, + "grad_norm": 25.54216194152832, + "learning_rate": 5e-05, + "loss": 1.5068, + "num_input_tokens_seen": 169150616, + "step": 2554 + }, + { + "epoch": 0.2390602330696869, + "loss": 1.3988134860992432, + "loss_ce": 0.009165081195533276, + "loss_iou": 0.5859375, + "loss_num": 0.04345703125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 169150616, + "step": 2554 + }, + { + "epoch": 0.23915383535358262, + "grad_norm": 22.393592834472656, + "learning_rate": 5e-05, + "loss": 1.4269, + "num_input_tokens_seen": 169217596, + "step": 2555 + }, + { + "epoch": 0.23915383535358262, + "loss": 1.2467050552368164, + "loss_ce": 0.0064705777913331985, + "loss_iou": 0.50390625, + "loss_num": 0.0458984375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 169217596, + "step": 2555 + }, + { + "epoch": 0.23924743763747836, + "grad_norm": 22.189350128173828, + "learning_rate": 5e-05, + "loss": 1.6242, + "num_input_tokens_seen": 169283752, + "step": 2556 + }, + { + "epoch": 0.23924743763747836, + "loss": 1.204790711402893, + "loss_ce": 0.003618875052779913, + "loss_iou": 0.50390625, + "loss_num": 0.0390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 169283752, + "step": 2556 + }, + { + "epoch": 0.2393410399213741, + "grad_norm": 18.53838348388672, + "learning_rate": 5e-05, + "loss": 1.3443, + "num_input_tokens_seen": 169351472, + "step": 2557 + }, + { + "epoch": 0.2393410399213741, + "loss": 1.192333698272705, + "loss_ce": 0.004833739250898361, + "loss_iou": 0.5078125, + "loss_num": 0.0341796875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 169351472, + "step": 2557 + }, + { + "epoch": 0.2394346422052698, + "grad_norm": 36.56483840942383, + "learning_rate": 5e-05, + "loss": 1.4114, + "num_input_tokens_seen": 169417264, + "step": 2558 + }, + { + "epoch": 0.2394346422052698, + "loss": 1.2931647300720215, + "loss_ce": 0.0021491688676178455, + "loss_iou": 0.5546875, + "loss_num": 0.035888671875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 169417264, + "step": 2558 + }, + { + "epoch": 0.23952824448916554, + "grad_norm": 28.568727493286133, + "learning_rate": 5e-05, + "loss": 1.7392, + "num_input_tokens_seen": 169482812, + "step": 2559 + }, + { + "epoch": 0.23952824448916554, + "loss": 1.7891249656677246, + "loss_ce": 0.0068985046818852425, + "loss_iou": 0.734375, + "loss_num": 0.06298828125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 169482812, + "step": 2559 + }, + { + "epoch": 0.23962184677306125, + "grad_norm": 14.091453552246094, + "learning_rate": 5e-05, + "loss": 1.4067, + "num_input_tokens_seen": 169548612, + "step": 2560 + }, + { + "epoch": 0.23962184677306125, + "loss": 1.3772664070129395, + "loss_ce": 0.007149163633584976, + "loss_iou": 0.5390625, + "loss_num": 0.05859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 169548612, + "step": 2560 + }, + { + "epoch": 0.239715449056957, + "grad_norm": 19.679611206054688, + "learning_rate": 5e-05, + "loss": 1.453, + "num_input_tokens_seen": 169614736, + "step": 2561 + }, + { + "epoch": 0.239715449056957, + "loss": 1.468279242515564, + "loss_ce": 0.008318359032273293, + "loss_iou": 0.578125, + "loss_num": 0.060791015625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 169614736, + "step": 2561 + }, + { + "epoch": 0.23980905134085273, + "grad_norm": 23.91083335876465, + "learning_rate": 5e-05, + "loss": 1.1501, + "num_input_tokens_seen": 169680272, + "step": 2562 + }, + { + "epoch": 0.23980905134085273, + "loss": 1.3164702653884888, + "loss_ce": 0.006411626935005188, + "loss_iou": 0.5546875, + "loss_num": 0.040771484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 169680272, + "step": 2562 + }, + { + "epoch": 0.23990265362474844, + "grad_norm": 27.89542007446289, + "learning_rate": 5e-05, + "loss": 1.3645, + "num_input_tokens_seen": 169745792, + "step": 2563 + }, + { + "epoch": 0.23990265362474844, + "loss": 1.1841665506362915, + "loss_ce": 0.00862942449748516, + "loss_iou": 0.45703125, + "loss_num": 0.05224609375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 169745792, + "step": 2563 + }, + { + "epoch": 0.23999625590864418, + "grad_norm": 21.266613006591797, + "learning_rate": 5e-05, + "loss": 1.4262, + "num_input_tokens_seen": 169812188, + "step": 2564 + }, + { + "epoch": 0.23999625590864418, + "loss": 1.3133118152618408, + "loss_ce": 0.010089192539453506, + "loss_iou": 0.5546875, + "loss_num": 0.03857421875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 169812188, + "step": 2564 + }, + { + "epoch": 0.2400898581925399, + "grad_norm": 21.182580947875977, + "learning_rate": 5e-05, + "loss": 1.4142, + "num_input_tokens_seen": 169877916, + "step": 2565 + }, + { + "epoch": 0.2400898581925399, + "loss": 1.5413999557495117, + "loss_ce": 0.0062437597662210464, + "loss_iou": 0.62109375, + "loss_num": 0.057861328125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 169877916, + "step": 2565 + }, + { + "epoch": 0.24018346047643563, + "grad_norm": 52.13093185424805, + "learning_rate": 5e-05, + "loss": 1.5151, + "num_input_tokens_seen": 169944320, + "step": 2566 + }, + { + "epoch": 0.24018346047643563, + "loss": 1.5705912113189697, + "loss_ce": 0.010044336318969727, + "loss_iou": 0.65625, + "loss_num": 0.049072265625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 169944320, + "step": 2566 + }, + { + "epoch": 0.24027706276033134, + "grad_norm": 23.22975730895996, + "learning_rate": 5e-05, + "loss": 1.3332, + "num_input_tokens_seen": 170011128, + "step": 2567 + }, + { + "epoch": 0.24027706276033134, + "loss": 1.5615291595458984, + "loss_ce": 0.007818352431058884, + "loss_iou": 0.6484375, + "loss_num": 0.05224609375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 170011128, + "step": 2567 + }, + { + "epoch": 0.24037066504422708, + "grad_norm": 25.49972152709961, + "learning_rate": 5e-05, + "loss": 1.3914, + "num_input_tokens_seen": 170077496, + "step": 2568 + }, + { + "epoch": 0.24037066504422708, + "loss": 1.5926051139831543, + "loss_ce": 0.007644252851605415, + "loss_iou": 0.6171875, + "loss_num": 0.06982421875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 170077496, + "step": 2568 + }, + { + "epoch": 0.24046426732812282, + "grad_norm": 40.0810546875, + "learning_rate": 5e-05, + "loss": 1.5399, + "num_input_tokens_seen": 170144536, + "step": 2569 + }, + { + "epoch": 0.24046426732812282, + "loss": 1.6190154552459717, + "loss_ce": 0.0047575682401657104, + "loss_iou": 0.6953125, + "loss_num": 0.045166015625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 170144536, + "step": 2569 + }, + { + "epoch": 0.24055786961201853, + "grad_norm": 88.32617950439453, + "learning_rate": 5e-05, + "loss": 1.5308, + "num_input_tokens_seen": 170210088, + "step": 2570 + }, + { + "epoch": 0.24055786961201853, + "loss": 1.5420403480529785, + "loss_ce": 0.006395814009010792, + "loss_iou": 0.58203125, + "loss_num": 0.07373046875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 170210088, + "step": 2570 + }, + { + "epoch": 0.24065147189591427, + "grad_norm": 21.787992477416992, + "learning_rate": 5e-05, + "loss": 1.495, + "num_input_tokens_seen": 170276552, + "step": 2571 + }, + { + "epoch": 0.24065147189591427, + "loss": 1.5120552778244019, + "loss_ce": 0.004242782015353441, + "loss_iou": 0.65625, + "loss_num": 0.03857421875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 170276552, + "step": 2571 + }, + { + "epoch": 0.24074507417980998, + "grad_norm": 11.673762321472168, + "learning_rate": 5e-05, + "loss": 1.3003, + "num_input_tokens_seen": 170342504, + "step": 2572 + }, + { + "epoch": 0.24074507417980998, + "loss": 1.2681864500045776, + "loss_ce": 0.0025614858604967594, + "loss_iou": 0.4921875, + "loss_num": 0.056396484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 170342504, + "step": 2572 + }, + { + "epoch": 0.24083867646370571, + "grad_norm": 25.950706481933594, + "learning_rate": 5e-05, + "loss": 1.3317, + "num_input_tokens_seen": 170408564, + "step": 2573 + }, + { + "epoch": 0.24083867646370571, + "loss": 1.2869504690170288, + "loss_ce": 0.006188774481415749, + "loss_iou": 0.53515625, + "loss_num": 0.041015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 170408564, + "step": 2573 + }, + { + "epoch": 0.24093227874760145, + "grad_norm": 27.499576568603516, + "learning_rate": 5e-05, + "loss": 1.2999, + "num_input_tokens_seen": 170473744, + "step": 2574 + }, + { + "epoch": 0.24093227874760145, + "loss": 1.6399996280670166, + "loss_ce": 0.005234008654952049, + "loss_iou": 0.66015625, + "loss_num": 0.062255859375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 170473744, + "step": 2574 + }, + { + "epoch": 0.24102588103149716, + "grad_norm": 20.279264450073242, + "learning_rate": 5e-05, + "loss": 1.614, + "num_input_tokens_seen": 170540660, + "step": 2575 + }, + { + "epoch": 0.24102588103149716, + "loss": 1.6746065616607666, + "loss_ce": 0.0037080540787428617, + "loss_iou": 0.66796875, + "loss_num": 0.06689453125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 170540660, + "step": 2575 + }, + { + "epoch": 0.2411194833153929, + "grad_norm": 12.744171142578125, + "learning_rate": 5e-05, + "loss": 1.1389, + "num_input_tokens_seen": 170607608, + "step": 2576 + }, + { + "epoch": 0.2411194833153929, + "loss": 1.1031405925750732, + "loss_ce": 0.004996098577976227, + "loss_iou": 0.46484375, + "loss_num": 0.0341796875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 170607608, + "step": 2576 + }, + { + "epoch": 0.2412130855992886, + "grad_norm": 19.449844360351562, + "learning_rate": 5e-05, + "loss": 1.3246, + "num_input_tokens_seen": 170674336, + "step": 2577 + }, + { + "epoch": 0.2412130855992886, + "loss": 1.2694692611694336, + "loss_ce": 0.008727147243916988, + "loss_iou": 0.51953125, + "loss_num": 0.0439453125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 170674336, + "step": 2577 + }, + { + "epoch": 0.24130668788318435, + "grad_norm": 19.487255096435547, + "learning_rate": 5e-05, + "loss": 1.522, + "num_input_tokens_seen": 170741276, + "step": 2578 + }, + { + "epoch": 0.24130668788318435, + "loss": 1.6049238443374634, + "loss_ce": 0.006291025318205357, + "loss_iou": 0.66796875, + "loss_num": 0.05322265625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 170741276, + "step": 2578 + }, + { + "epoch": 0.2414002901670801, + "grad_norm": 30.820302963256836, + "learning_rate": 5e-05, + "loss": 1.2787, + "num_input_tokens_seen": 170807096, + "step": 2579 + }, + { + "epoch": 0.2414002901670801, + "loss": 1.2158865928649902, + "loss_ce": 0.007878745906054974, + "loss_iou": 0.53515625, + "loss_num": 0.0269775390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 170807096, + "step": 2579 + }, + { + "epoch": 0.2414938924509758, + "grad_norm": 28.080768585205078, + "learning_rate": 5e-05, + "loss": 1.1996, + "num_input_tokens_seen": 170872096, + "step": 2580 + }, + { + "epoch": 0.2414938924509758, + "loss": 1.2678003311157227, + "loss_ce": 0.003395965788513422, + "loss_iou": 0.4921875, + "loss_num": 0.055908203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 170872096, + "step": 2580 + }, + { + "epoch": 0.24158749473487154, + "grad_norm": 35.67443084716797, + "learning_rate": 5e-05, + "loss": 1.4635, + "num_input_tokens_seen": 170937528, + "step": 2581 + }, + { + "epoch": 0.24158749473487154, + "loss": 1.3514882326126099, + "loss_ce": 0.006136074662208557, + "loss_iou": 0.5390625, + "loss_num": 0.053955078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 170937528, + "step": 2581 + }, + { + "epoch": 0.24168109701876725, + "grad_norm": 23.175838470458984, + "learning_rate": 5e-05, + "loss": 1.2044, + "num_input_tokens_seen": 171003588, + "step": 2582 + }, + { + "epoch": 0.24168109701876725, + "loss": 1.240459680557251, + "loss_ce": 0.0031549385748803616, + "loss_iou": 0.54296875, + "loss_num": 0.030029296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 171003588, + "step": 2582 + }, + { + "epoch": 0.241774699302663, + "grad_norm": 36.16810989379883, + "learning_rate": 5e-05, + "loss": 1.6477, + "num_input_tokens_seen": 171069960, + "step": 2583 + }, + { + "epoch": 0.241774699302663, + "loss": 1.805147409439087, + "loss_ce": 0.003389598336070776, + "loss_iou": 0.75, + "loss_num": 0.06103515625, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 171069960, + "step": 2583 + }, + { + "epoch": 0.2418683015865587, + "grad_norm": 20.256290435791016, + "learning_rate": 5e-05, + "loss": 1.6536, + "num_input_tokens_seen": 171136008, + "step": 2584 + }, + { + "epoch": 0.2418683015865587, + "loss": 1.4925332069396973, + "loss_ce": 0.0032754617277532816, + "loss_iou": 0.625, + "loss_num": 0.0478515625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 171136008, + "step": 2584 + }, + { + "epoch": 0.24196190387045444, + "grad_norm": 26.217763900756836, + "learning_rate": 5e-05, + "loss": 1.3569, + "num_input_tokens_seen": 171202648, + "step": 2585 + }, + { + "epoch": 0.24196190387045444, + "loss": 1.381807565689087, + "loss_ce": 0.0033896476961672306, + "loss_iou": 0.546875, + "loss_num": 0.056884765625, + "loss_xval": 1.375, + "num_input_tokens_seen": 171202648, + "step": 2585 + }, + { + "epoch": 0.24205550615435018, + "grad_norm": 36.30076599121094, + "learning_rate": 5e-05, + "loss": 1.4802, + "num_input_tokens_seen": 171268772, + "step": 2586 + }, + { + "epoch": 0.24205550615435018, + "loss": 1.5037238597869873, + "loss_ce": 0.003235521959140897, + "loss_iou": 0.63671875, + "loss_num": 0.04541015625, + "loss_xval": 1.5, + "num_input_tokens_seen": 171268772, + "step": 2586 + }, + { + "epoch": 0.24214910843824589, + "grad_norm": 23.128686904907227, + "learning_rate": 5e-05, + "loss": 1.7105, + "num_input_tokens_seen": 171335572, + "step": 2587 + }, + { + "epoch": 0.24214910843824589, + "loss": 1.7145795822143555, + "loss_ce": 0.006571783684194088, + "loss_iou": 0.68359375, + "loss_num": 0.06884765625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 171335572, + "step": 2587 + }, + { + "epoch": 0.24224271072214162, + "grad_norm": 18.27375030517578, + "learning_rate": 5e-05, + "loss": 1.1838, + "num_input_tokens_seen": 171402616, + "step": 2588 + }, + { + "epoch": 0.24224271072214162, + "loss": 1.2350122928619385, + "loss_ce": 0.003078640438616276, + "loss_iou": 0.546875, + "loss_num": 0.02783203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 171402616, + "step": 2588 + }, + { + "epoch": 0.24233631300603733, + "grad_norm": 22.334333419799805, + "learning_rate": 5e-05, + "loss": 1.5672, + "num_input_tokens_seen": 171468692, + "step": 2589 + }, + { + "epoch": 0.24233631300603733, + "loss": 1.8665173053741455, + "loss_ce": 0.006165698170661926, + "loss_iou": 0.6953125, + "loss_num": 0.09326171875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 171468692, + "step": 2589 + }, + { + "epoch": 0.24242991528993307, + "grad_norm": 27.002775192260742, + "learning_rate": 5e-05, + "loss": 1.409, + "num_input_tokens_seen": 171534336, + "step": 2590 + }, + { + "epoch": 0.24242991528993307, + "loss": 1.4501802921295166, + "loss_ce": 0.009262454695999622, + "loss_iou": 0.5625, + "loss_num": 0.0634765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 171534336, + "step": 2590 + }, + { + "epoch": 0.2425235175738288, + "grad_norm": 33.853355407714844, + "learning_rate": 5e-05, + "loss": 1.4523, + "num_input_tokens_seen": 171600636, + "step": 2591 + }, + { + "epoch": 0.2425235175738288, + "loss": 1.5113219022750854, + "loss_ce": 0.006439114920794964, + "loss_iou": 0.6328125, + "loss_num": 0.04833984375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 171600636, + "step": 2591 + }, + { + "epoch": 0.24261711985772452, + "grad_norm": 22.62224769592285, + "learning_rate": 5e-05, + "loss": 1.4184, + "num_input_tokens_seen": 171666148, + "step": 2592 + }, + { + "epoch": 0.24261711985772452, + "loss": 1.409839391708374, + "loss_ce": 0.004566041752696037, + "loss_iou": 0.60546875, + "loss_num": 0.0390625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 171666148, + "step": 2592 + }, + { + "epoch": 0.24271072214162026, + "grad_norm": 20.081279754638672, + "learning_rate": 5e-05, + "loss": 1.0565, + "num_input_tokens_seen": 171731468, + "step": 2593 + }, + { + "epoch": 0.24271072214162026, + "loss": 1.211484670639038, + "loss_ce": 0.005918317008763552, + "loss_iou": 0.51171875, + "loss_num": 0.036865234375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 171731468, + "step": 2593 + }, + { + "epoch": 0.24280432442551597, + "grad_norm": 24.427959442138672, + "learning_rate": 5e-05, + "loss": 1.3446, + "num_input_tokens_seen": 171797680, + "step": 2594 + }, + { + "epoch": 0.24280432442551597, + "loss": 1.4312868118286133, + "loss_ce": 0.00550546171143651, + "loss_iou": 0.578125, + "loss_num": 0.05419921875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 171797680, + "step": 2594 + }, + { + "epoch": 0.2428979267094117, + "grad_norm": 24.9825439453125, + "learning_rate": 5e-05, + "loss": 1.4902, + "num_input_tokens_seen": 171865792, + "step": 2595 + }, + { + "epoch": 0.2428979267094117, + "loss": 1.6268292665481567, + "loss_ce": 0.0018292388413101435, + "loss_iou": 0.65625, + "loss_num": 0.062255859375, + "loss_xval": 1.625, + "num_input_tokens_seen": 171865792, + "step": 2595 + }, + { + "epoch": 0.24299152899330745, + "grad_norm": 65.03515625, + "learning_rate": 5e-05, + "loss": 1.6393, + "num_input_tokens_seen": 171932384, + "step": 2596 + }, + { + "epoch": 0.24299152899330745, + "loss": 1.5383191108703613, + "loss_ce": 0.005604333709925413, + "loss_iou": 0.62890625, + "loss_num": 0.05419921875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 171932384, + "step": 2596 + }, + { + "epoch": 0.24308513127720316, + "grad_norm": 31.7029972076416, + "learning_rate": 5e-05, + "loss": 1.1096, + "num_input_tokens_seen": 171998412, + "step": 2597 + }, + { + "epoch": 0.24308513127720316, + "loss": 1.1933958530426025, + "loss_ce": 0.004919266793876886, + "loss_iou": 0.49609375, + "loss_num": 0.0390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 171998412, + "step": 2597 + }, + { + "epoch": 0.2431787335610989, + "grad_norm": 21.96271324157715, + "learning_rate": 5e-05, + "loss": 1.4555, + "num_input_tokens_seen": 172064900, + "step": 2598 + }, + { + "epoch": 0.2431787335610989, + "loss": 1.7012999057769775, + "loss_ce": 0.0030576358549296856, + "loss_iou": 0.7265625, + "loss_num": 0.049072265625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 172064900, + "step": 2598 + }, + { + "epoch": 0.2432723358449946, + "grad_norm": 35.74529266357422, + "learning_rate": 5e-05, + "loss": 1.3992, + "num_input_tokens_seen": 172131392, + "step": 2599 + }, + { + "epoch": 0.2432723358449946, + "loss": 1.285733938217163, + "loss_ce": 0.009122655726969242, + "loss_iou": 0.490234375, + "loss_num": 0.05908203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 172131392, + "step": 2599 + }, + { + "epoch": 0.24336593812889035, + "grad_norm": 23.304189682006836, + "learning_rate": 5e-05, + "loss": 1.8166, + "num_input_tokens_seen": 172198288, + "step": 2600 + }, + { + "epoch": 0.24336593812889035, + "loss": 2.132443904876709, + "loss_ce": 0.0064674364402890205, + "loss_iou": 0.80859375, + "loss_num": 0.1015625, + "loss_xval": 2.125, + "num_input_tokens_seen": 172198288, + "step": 2600 + }, + { + "epoch": 0.24345954041278609, + "grad_norm": 10.42893123626709, + "learning_rate": 5e-05, + "loss": 1.513, + "num_input_tokens_seen": 172264740, + "step": 2601 + }, + { + "epoch": 0.24345954041278609, + "loss": 1.3585100173950195, + "loss_ce": 0.004017863888293505, + "loss_iou": 0.53125, + "loss_num": 0.05859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 172264740, + "step": 2601 + }, + { + "epoch": 0.2435531426966818, + "grad_norm": 21.951444625854492, + "learning_rate": 5e-05, + "loss": 1.2679, + "num_input_tokens_seen": 172330220, + "step": 2602 + }, + { + "epoch": 0.2435531426966818, + "loss": 1.268410086631775, + "loss_ce": 0.002296827267855406, + "loss_iou": 0.51171875, + "loss_num": 0.048828125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 172330220, + "step": 2602 + }, + { + "epoch": 0.24364674498057753, + "grad_norm": 27.78486442565918, + "learning_rate": 5e-05, + "loss": 1.6186, + "num_input_tokens_seen": 172397216, + "step": 2603 + }, + { + "epoch": 0.24364674498057753, + "loss": 1.6075851917266846, + "loss_ce": 0.0011398645583540201, + "loss_iou": 0.6484375, + "loss_num": 0.061767578125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 172397216, + "step": 2603 + }, + { + "epoch": 0.24374034726447324, + "grad_norm": 23.510292053222656, + "learning_rate": 5e-05, + "loss": 1.1218, + "num_input_tokens_seen": 172464464, + "step": 2604 + }, + { + "epoch": 0.24374034726447324, + "loss": 1.1526579856872559, + "loss_ce": 0.006173687055706978, + "loss_iou": 0.484375, + "loss_num": 0.03564453125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 172464464, + "step": 2604 + }, + { + "epoch": 0.24383394954836898, + "grad_norm": 29.873241424560547, + "learning_rate": 5e-05, + "loss": 1.663, + "num_input_tokens_seen": 172530968, + "step": 2605 + }, + { + "epoch": 0.24383394954836898, + "loss": 1.8581818342208862, + "loss_ce": 0.0080841314047575, + "loss_iou": 0.7265625, + "loss_num": 0.08056640625, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 172530968, + "step": 2605 + }, + { + "epoch": 0.2439275518322647, + "grad_norm": 27.9725399017334, + "learning_rate": 5e-05, + "loss": 1.344, + "num_input_tokens_seen": 172597692, + "step": 2606 + }, + { + "epoch": 0.2439275518322647, + "loss": 1.5610685348510742, + "loss_ce": 0.006869280710816383, + "loss_iou": 0.6328125, + "loss_num": 0.0576171875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 172597692, + "step": 2606 + }, + { + "epoch": 0.24402115411616043, + "grad_norm": 20.911762237548828, + "learning_rate": 5e-05, + "loss": 1.3184, + "num_input_tokens_seen": 172663132, + "step": 2607 + }, + { + "epoch": 0.24402115411616043, + "loss": 1.3391375541687012, + "loss_ce": 0.002467667916789651, + "loss_iou": 0.57421875, + "loss_num": 0.037841796875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 172663132, + "step": 2607 + }, + { + "epoch": 0.24411475640005617, + "grad_norm": 17.5421085357666, + "learning_rate": 5e-05, + "loss": 1.5946, + "num_input_tokens_seen": 172730444, + "step": 2608 + }, + { + "epoch": 0.24411475640005617, + "loss": 1.7361726760864258, + "loss_ce": 0.006192308850586414, + "loss_iou": 0.6796875, + "loss_num": 0.07421875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 172730444, + "step": 2608 + }, + { + "epoch": 0.24420835868395188, + "grad_norm": 19.712034225463867, + "learning_rate": 5e-05, + "loss": 1.4634, + "num_input_tokens_seen": 172796284, + "step": 2609 + }, + { + "epoch": 0.24420835868395188, + "loss": 1.4511772394180298, + "loss_ce": 0.008306168019771576, + "loss_iou": 0.58203125, + "loss_num": 0.05517578125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 172796284, + "step": 2609 + }, + { + "epoch": 0.24430196096784762, + "grad_norm": 31.43054962158203, + "learning_rate": 5e-05, + "loss": 1.6605, + "num_input_tokens_seen": 172862912, + "step": 2610 + }, + { + "epoch": 0.24430196096784762, + "loss": 1.5833003520965576, + "loss_ce": 0.007128553930670023, + "loss_iou": 0.6484375, + "loss_num": 0.05615234375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 172862912, + "step": 2610 + }, + { + "epoch": 0.24439556325174333, + "grad_norm": 52.54152297973633, + "learning_rate": 5e-05, + "loss": 1.7262, + "num_input_tokens_seen": 172930156, + "step": 2611 + }, + { + "epoch": 0.24439556325174333, + "loss": 1.826629638671875, + "loss_ce": 0.0024108190555125475, + "loss_iou": 0.796875, + "loss_num": 0.046142578125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 172930156, + "step": 2611 + }, + { + "epoch": 0.24448916553563907, + "grad_norm": 15.385767936706543, + "learning_rate": 5e-05, + "loss": 1.3084, + "num_input_tokens_seen": 172996928, + "step": 2612 + }, + { + "epoch": 0.24448916553563907, + "loss": 1.4285422563552856, + "loss_ce": 0.0027609597891569138, + "loss_iou": 0.59765625, + "loss_num": 0.04638671875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 172996928, + "step": 2612 + }, + { + "epoch": 0.2445827678195348, + "grad_norm": 17.045854568481445, + "learning_rate": 5e-05, + "loss": 1.0687, + "num_input_tokens_seen": 173062400, + "step": 2613 + }, + { + "epoch": 0.2445827678195348, + "loss": 0.8779564499855042, + "loss_ce": 0.0014916412765160203, + "loss_iou": 0.376953125, + "loss_num": 0.0247802734375, + "loss_xval": 0.875, + "num_input_tokens_seen": 173062400, + "step": 2613 + }, + { + "epoch": 0.24467637010343052, + "grad_norm": 24.084728240966797, + "learning_rate": 5e-05, + "loss": 1.4739, + "num_input_tokens_seen": 173128656, + "step": 2614 + }, + { + "epoch": 0.24467637010343052, + "loss": 1.4581732749938965, + "loss_ce": 0.007978045381605625, + "loss_iou": 0.58203125, + "loss_num": 0.05810546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 173128656, + "step": 2614 + }, + { + "epoch": 0.24476997238732626, + "grad_norm": 19.442224502563477, + "learning_rate": 5e-05, + "loss": 1.5273, + "num_input_tokens_seen": 173195136, + "step": 2615 + }, + { + "epoch": 0.24476997238732626, + "loss": 1.490162968635559, + "loss_ce": 0.004811372607946396, + "loss_iou": 0.578125, + "loss_num": 0.06689453125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 173195136, + "step": 2615 + }, + { + "epoch": 0.24486357467122197, + "grad_norm": 17.347576141357422, + "learning_rate": 5e-05, + "loss": 1.2705, + "num_input_tokens_seen": 173262156, + "step": 2616 + }, + { + "epoch": 0.24486357467122197, + "loss": 1.2691221237182617, + "loss_ce": 0.006426775828003883, + "loss_iou": 0.55859375, + "loss_num": 0.029296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 173262156, + "step": 2616 + }, + { + "epoch": 0.2449571769551177, + "grad_norm": 25.86452293395996, + "learning_rate": 5e-05, + "loss": 1.3638, + "num_input_tokens_seen": 173329596, + "step": 2617 + }, + { + "epoch": 0.2449571769551177, + "loss": 1.4407048225402832, + "loss_ce": 0.008087588474154472, + "loss_iou": 0.62109375, + "loss_num": 0.0380859375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 173329596, + "step": 2617 + }, + { + "epoch": 0.24505077923901344, + "grad_norm": 22.31635856628418, + "learning_rate": 5e-05, + "loss": 1.4014, + "num_input_tokens_seen": 173394924, + "step": 2618 + }, + { + "epoch": 0.24505077923901344, + "loss": 1.4378025531768799, + "loss_ce": 0.00616195984184742, + "loss_iou": 0.58984375, + "loss_num": 0.05029296875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 173394924, + "step": 2618 + }, + { + "epoch": 0.24514438152290915, + "grad_norm": 18.50616455078125, + "learning_rate": 5e-05, + "loss": 1.5329, + "num_input_tokens_seen": 173461368, + "step": 2619 + }, + { + "epoch": 0.24514438152290915, + "loss": 1.4879770278930664, + "loss_ce": 0.0036019599065184593, + "loss_iou": 0.63671875, + "loss_num": 0.042236328125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 173461368, + "step": 2619 + }, + { + "epoch": 0.2452379838068049, + "grad_norm": 54.79182815551758, + "learning_rate": 5e-05, + "loss": 1.1385, + "num_input_tokens_seen": 173528312, + "step": 2620 + }, + { + "epoch": 0.2452379838068049, + "loss": 1.1173272132873535, + "loss_ce": 0.006487318314611912, + "loss_iou": 0.458984375, + "loss_num": 0.0380859375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 173528312, + "step": 2620 + }, + { + "epoch": 0.2453315860907006, + "grad_norm": 27.31485366821289, + "learning_rate": 5e-05, + "loss": 1.3793, + "num_input_tokens_seen": 173594048, + "step": 2621 + }, + { + "epoch": 0.2453315860907006, + "loss": 1.5278196334838867, + "loss_ce": 0.009264917112886906, + "loss_iou": 0.6484375, + "loss_num": 0.044677734375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 173594048, + "step": 2621 + }, + { + "epoch": 0.24542518837459634, + "grad_norm": 35.106300354003906, + "learning_rate": 5e-05, + "loss": 1.4417, + "num_input_tokens_seen": 173660156, + "step": 2622 + }, + { + "epoch": 0.24542518837459634, + "loss": 1.4897924661636353, + "loss_ce": 0.009323794394731522, + "loss_iou": 0.62890625, + "loss_num": 0.04541015625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 173660156, + "step": 2622 + }, + { + "epoch": 0.24551879065849208, + "grad_norm": 18.99059295654297, + "learning_rate": 5e-05, + "loss": 1.864, + "num_input_tokens_seen": 173725528, + "step": 2623 + }, + { + "epoch": 0.24551879065849208, + "loss": 1.9642072916030884, + "loss_ce": 0.006199460010975599, + "loss_iou": 0.75390625, + "loss_num": 0.09033203125, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 173725528, + "step": 2623 + }, + { + "epoch": 0.2456123929423878, + "grad_norm": 27.83020782470703, + "learning_rate": 5e-05, + "loss": 1.4457, + "num_input_tokens_seen": 173790980, + "step": 2624 + }, + { + "epoch": 0.2456123929423878, + "loss": 1.4178355932235718, + "loss_ce": 0.005726196337491274, + "loss_iou": 0.56640625, + "loss_num": 0.055908203125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 173790980, + "step": 2624 + }, + { + "epoch": 0.24570599522628353, + "grad_norm": 17.386829376220703, + "learning_rate": 5e-05, + "loss": 1.1944, + "num_input_tokens_seen": 173857956, + "step": 2625 + }, + { + "epoch": 0.24570599522628353, + "loss": 1.383741855621338, + "loss_ce": 0.010206678882241249, + "loss_iou": 0.56640625, + "loss_num": 0.049072265625, + "loss_xval": 1.375, + "num_input_tokens_seen": 173857956, + "step": 2625 + }, + { + "epoch": 0.24579959751017924, + "grad_norm": 22.78047752380371, + "learning_rate": 5e-05, + "loss": 1.1274, + "num_input_tokens_seen": 173923328, + "step": 2626 + }, + { + "epoch": 0.24579959751017924, + "loss": 1.2062945365905762, + "loss_ce": 0.002406663727015257, + "loss_iou": 0.50390625, + "loss_num": 0.039794921875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 173923328, + "step": 2626 + }, + { + "epoch": 0.24589319979407498, + "grad_norm": 19.326377868652344, + "learning_rate": 5e-05, + "loss": 1.1684, + "num_input_tokens_seen": 173990208, + "step": 2627 + }, + { + "epoch": 0.24589319979407498, + "loss": 1.2504380941390991, + "loss_ce": 0.0028795571997761726, + "loss_iou": 0.53515625, + "loss_num": 0.035888671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 173990208, + "step": 2627 + }, + { + "epoch": 0.2459868020779707, + "grad_norm": 63.571311950683594, + "learning_rate": 5e-05, + "loss": 1.5023, + "num_input_tokens_seen": 174055968, + "step": 2628 + }, + { + "epoch": 0.2459868020779707, + "loss": 1.4654862880706787, + "loss_ce": 0.004548734985291958, + "loss_iou": 0.62109375, + "loss_num": 0.044189453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 174055968, + "step": 2628 + }, + { + "epoch": 0.24608040436186643, + "grad_norm": 19.17978858947754, + "learning_rate": 5e-05, + "loss": 1.2832, + "num_input_tokens_seen": 174122268, + "step": 2629 + }, + { + "epoch": 0.24608040436186643, + "loss": 1.0923244953155518, + "loss_ce": 0.004433851223438978, + "loss_iou": 0.484375, + "loss_num": 0.023681640625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 174122268, + "step": 2629 + }, + { + "epoch": 0.24617400664576217, + "grad_norm": 23.298887252807617, + "learning_rate": 5e-05, + "loss": 1.3279, + "num_input_tokens_seen": 174189404, + "step": 2630 + }, + { + "epoch": 0.24617400664576217, + "loss": 1.3415188789367676, + "loss_ce": 0.006069748662412167, + "loss_iou": 0.55859375, + "loss_num": 0.04443359375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 174189404, + "step": 2630 + }, + { + "epoch": 0.24626760892965788, + "grad_norm": 40.40921401977539, + "learning_rate": 5e-05, + "loss": 1.6967, + "num_input_tokens_seen": 174255792, + "step": 2631 + }, + { + "epoch": 0.24626760892965788, + "loss": 1.7132081985473633, + "loss_ce": 0.009594895876944065, + "loss_iou": 0.67578125, + "loss_num": 0.0703125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 174255792, + "step": 2631 + }, + { + "epoch": 0.24636121121355362, + "grad_norm": 22.798112869262695, + "learning_rate": 5e-05, + "loss": 1.2834, + "num_input_tokens_seen": 174321896, + "step": 2632 + }, + { + "epoch": 0.24636121121355362, + "loss": 1.1797900199890137, + "loss_ce": 0.00478690629824996, + "loss_iou": 0.494140625, + "loss_num": 0.03759765625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 174321896, + "step": 2632 + }, + { + "epoch": 0.24645481349744933, + "grad_norm": 26.252437591552734, + "learning_rate": 5e-05, + "loss": 1.6086, + "num_input_tokens_seen": 174386764, + "step": 2633 + }, + { + "epoch": 0.24645481349744933, + "loss": 1.3368690013885498, + "loss_ce": 0.0036171525716781616, + "loss_iou": 0.5625, + "loss_num": 0.042236328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 174386764, + "step": 2633 + }, + { + "epoch": 0.24654841578134506, + "grad_norm": 31.601247787475586, + "learning_rate": 5e-05, + "loss": 1.4251, + "num_input_tokens_seen": 174452944, + "step": 2634 + }, + { + "epoch": 0.24654841578134506, + "loss": 1.297585129737854, + "loss_ce": 0.004616389982402325, + "loss_iou": 0.58203125, + "loss_num": 0.0255126953125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 174452944, + "step": 2634 + }, + { + "epoch": 0.2466420180652408, + "grad_norm": 59.829978942871094, + "learning_rate": 5e-05, + "loss": 1.5846, + "num_input_tokens_seen": 174518836, + "step": 2635 + }, + { + "epoch": 0.2466420180652408, + "loss": 1.516141653060913, + "loss_ce": 0.005399535410106182, + "loss_iou": 0.61328125, + "loss_num": 0.056396484375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 174518836, + "step": 2635 + }, + { + "epoch": 0.24673562034913651, + "grad_norm": 18.393312454223633, + "learning_rate": 5e-05, + "loss": 1.2716, + "num_input_tokens_seen": 174584940, + "step": 2636 + }, + { + "epoch": 0.24673562034913651, + "loss": 1.2012829780578613, + "loss_ce": 0.007435383275151253, + "loss_iou": 0.5234375, + "loss_num": 0.0296630859375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 174584940, + "step": 2636 + }, + { + "epoch": 0.24682922263303225, + "grad_norm": 21.422407150268555, + "learning_rate": 5e-05, + "loss": 1.6451, + "num_input_tokens_seen": 174650984, + "step": 2637 + }, + { + "epoch": 0.24682922263303225, + "loss": 1.4750261306762695, + "loss_ce": 0.005299532786011696, + "loss_iou": 0.625, + "loss_num": 0.04443359375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 174650984, + "step": 2637 + }, + { + "epoch": 0.24692282491692796, + "grad_norm": 18.727937698364258, + "learning_rate": 5e-05, + "loss": 1.3546, + "num_input_tokens_seen": 174717668, + "step": 2638 + }, + { + "epoch": 0.24692282491692796, + "loss": 1.2518789768218994, + "loss_ce": 0.007738376036286354, + "loss_iou": 0.515625, + "loss_num": 0.042236328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 174717668, + "step": 2638 + }, + { + "epoch": 0.2470164272008237, + "grad_norm": 29.191953659057617, + "learning_rate": 5e-05, + "loss": 1.5017, + "num_input_tokens_seen": 174783284, + "step": 2639 + }, + { + "epoch": 0.2470164272008237, + "loss": 1.5968594551086426, + "loss_ce": 0.007992290891706944, + "loss_iou": 0.671875, + "loss_num": 0.048828125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 174783284, + "step": 2639 + }, + { + "epoch": 0.24711002948471944, + "grad_norm": 22.436279296875, + "learning_rate": 5e-05, + "loss": 1.6543, + "num_input_tokens_seen": 174849024, + "step": 2640 + }, + { + "epoch": 0.24711002948471944, + "loss": 1.5690126419067383, + "loss_ce": 0.006512564606964588, + "loss_iou": 0.61328125, + "loss_num": 0.06640625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 174849024, + "step": 2640 + }, + { + "epoch": 0.24720363176861515, + "grad_norm": 39.852760314941406, + "learning_rate": 5e-05, + "loss": 1.4085, + "num_input_tokens_seen": 174915840, + "step": 2641 + }, + { + "epoch": 0.24720363176861515, + "loss": 1.4357869625091553, + "loss_ce": 0.004146297927945852, + "loss_iou": 0.6171875, + "loss_num": 0.0390625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 174915840, + "step": 2641 + }, + { + "epoch": 0.2472972340525109, + "grad_norm": 17.24985122680664, + "learning_rate": 5e-05, + "loss": 1.623, + "num_input_tokens_seen": 174982032, + "step": 2642 + }, + { + "epoch": 0.2472972340525109, + "loss": 1.6391594409942627, + "loss_ce": 0.0063468292355537415, + "loss_iou": 0.71484375, + "loss_num": 0.04052734375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 174982032, + "step": 2642 + }, + { + "epoch": 0.2473908363364066, + "grad_norm": 22.94183349609375, + "learning_rate": 5e-05, + "loss": 1.3532, + "num_input_tokens_seen": 175048324, + "step": 2643 + }, + { + "epoch": 0.2473908363364066, + "loss": 1.2797331809997559, + "loss_ce": 0.005807357374578714, + "loss_iou": 0.5625, + "loss_num": 0.030517578125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 175048324, + "step": 2643 + }, + { + "epoch": 0.24748443862030234, + "grad_norm": 33.577816009521484, + "learning_rate": 5e-05, + "loss": 1.4464, + "num_input_tokens_seen": 175114584, + "step": 2644 + }, + { + "epoch": 0.24748443862030234, + "loss": 1.29689359664917, + "loss_ce": 0.004901424515992403, + "loss_iou": 0.57421875, + "loss_num": 0.028076171875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 175114584, + "step": 2644 + }, + { + "epoch": 0.24757804090419805, + "grad_norm": 20.85687255859375, + "learning_rate": 5e-05, + "loss": 1.6549, + "num_input_tokens_seen": 175180888, + "step": 2645 + }, + { + "epoch": 0.24757804090419805, + "loss": 1.7518808841705322, + "loss_ce": 0.005787082947790623, + "loss_iou": 0.71875, + "loss_num": 0.060791015625, + "loss_xval": 1.75, + "num_input_tokens_seen": 175180888, + "step": 2645 + }, + { + "epoch": 0.2476716431880938, + "grad_norm": 17.017568588256836, + "learning_rate": 5e-05, + "loss": 1.1824, + "num_input_tokens_seen": 175246964, + "step": 2646 + }, + { + "epoch": 0.2476716431880938, + "loss": 1.1612648963928223, + "loss_ce": 0.006479742005467415, + "loss_iou": 0.5078125, + "loss_num": 0.02734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 175246964, + "step": 2646 + }, + { + "epoch": 0.24776524547198953, + "grad_norm": 27.16044044494629, + "learning_rate": 5e-05, + "loss": 1.4088, + "num_input_tokens_seen": 175313044, + "step": 2647 + }, + { + "epoch": 0.24776524547198953, + "loss": 1.6399792432785034, + "loss_ce": 0.006190221756696701, + "loss_iou": 0.66796875, + "loss_num": 0.059326171875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 175313044, + "step": 2647 + }, + { + "epoch": 0.24785884775588524, + "grad_norm": 32.581966400146484, + "learning_rate": 5e-05, + "loss": 1.3722, + "num_input_tokens_seen": 175379092, + "step": 2648 + }, + { + "epoch": 0.24785884775588524, + "loss": 1.215409517288208, + "loss_ce": 0.0037396925035864115, + "loss_iou": 0.515625, + "loss_num": 0.035888671875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 175379092, + "step": 2648 + }, + { + "epoch": 0.24795245003978097, + "grad_norm": 39.34456253051758, + "learning_rate": 5e-05, + "loss": 1.4922, + "num_input_tokens_seen": 175444548, + "step": 2649 + }, + { + "epoch": 0.24795245003978097, + "loss": 1.4172290563583374, + "loss_ce": 0.006126766093075275, + "loss_iou": 0.5859375, + "loss_num": 0.0478515625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 175444548, + "step": 2649 + }, + { + "epoch": 0.24804605232367669, + "grad_norm": 23.662206649780273, + "learning_rate": 5e-05, + "loss": 1.2165, + "num_input_tokens_seen": 175511196, + "step": 2650 + }, + { + "epoch": 0.24804605232367669, + "loss": 1.199338436126709, + "loss_ce": 0.011350231245160103, + "loss_iou": 0.49609375, + "loss_num": 0.0390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 175511196, + "step": 2650 + }, + { + "epoch": 0.24813965460757242, + "grad_norm": 25.820735931396484, + "learning_rate": 5e-05, + "loss": 1.6155, + "num_input_tokens_seen": 175577820, + "step": 2651 + }, + { + "epoch": 0.24813965460757242, + "loss": 1.6761157512664795, + "loss_ce": 0.010588502511382103, + "loss_iou": 0.6484375, + "loss_num": 0.07373046875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 175577820, + "step": 2651 + }, + { + "epoch": 0.24823325689146816, + "grad_norm": 31.59154510498047, + "learning_rate": 5e-05, + "loss": 1.2588, + "num_input_tokens_seen": 175644212, + "step": 2652 + }, + { + "epoch": 0.24823325689146816, + "loss": 1.0714844465255737, + "loss_ce": 0.0028809530194848776, + "loss_iou": 0.45703125, + "loss_num": 0.03076171875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 175644212, + "step": 2652 + }, + { + "epoch": 0.24832685917536387, + "grad_norm": 20.59835433959961, + "learning_rate": 5e-05, + "loss": 1.7055, + "num_input_tokens_seen": 175710016, + "step": 2653 + }, + { + "epoch": 0.24832685917536387, + "loss": 1.7338181734085083, + "loss_ce": 0.0053025586530566216, + "loss_iou": 0.70703125, + "loss_num": 0.06298828125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 175710016, + "step": 2653 + }, + { + "epoch": 0.2484204614592596, + "grad_norm": 62.92511749267578, + "learning_rate": 5e-05, + "loss": 1.279, + "num_input_tokens_seen": 175775460, + "step": 2654 + }, + { + "epoch": 0.2484204614592596, + "loss": 1.4731266498565674, + "loss_ce": 0.0038884193636476994, + "loss_iou": 0.61328125, + "loss_num": 0.04833984375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 175775460, + "step": 2654 + }, + { + "epoch": 0.24851406374315532, + "grad_norm": 27.96456527709961, + "learning_rate": 5e-05, + "loss": 1.3247, + "num_input_tokens_seen": 175841836, + "step": 2655 + }, + { + "epoch": 0.24851406374315532, + "loss": 1.4556419849395752, + "loss_ce": 0.004470153711736202, + "loss_iou": 0.6171875, + "loss_num": 0.042724609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 175841836, + "step": 2655 + }, + { + "epoch": 0.24860766602705106, + "grad_norm": 20.63963508605957, + "learning_rate": 5e-05, + "loss": 1.2664, + "num_input_tokens_seen": 175907648, + "step": 2656 + }, + { + "epoch": 0.24860766602705106, + "loss": 1.1753497123718262, + "loss_ce": 0.0049395374953746796, + "loss_iou": 0.4609375, + "loss_num": 0.050048828125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 175907648, + "step": 2656 + }, + { + "epoch": 0.2487012683109468, + "grad_norm": 28.03647804260254, + "learning_rate": 5e-05, + "loss": 1.4001, + "num_input_tokens_seen": 175974800, + "step": 2657 + }, + { + "epoch": 0.2487012683109468, + "loss": 1.455979824066162, + "loss_ce": 0.006761068478226662, + "loss_iou": 0.58984375, + "loss_num": 0.0537109375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 175974800, + "step": 2657 + }, + { + "epoch": 0.2487948705948425, + "grad_norm": 20.680822372436523, + "learning_rate": 5e-05, + "loss": 1.5617, + "num_input_tokens_seen": 176040236, + "step": 2658 + }, + { + "epoch": 0.2487948705948425, + "loss": 1.7301642894744873, + "loss_ce": 0.0045783137902617455, + "loss_iou": 0.6953125, + "loss_num": 0.06689453125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 176040236, + "step": 2658 + }, + { + "epoch": 0.24888847287873825, + "grad_norm": 14.060856819152832, + "learning_rate": 5e-05, + "loss": 1.2418, + "num_input_tokens_seen": 176106048, + "step": 2659 + }, + { + "epoch": 0.24888847287873825, + "loss": 1.3035095930099487, + "loss_ce": 0.00468143867328763, + "loss_iou": 0.5390625, + "loss_num": 0.043701171875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 176106048, + "step": 2659 + }, + { + "epoch": 0.24898207516263396, + "grad_norm": 24.060583114624023, + "learning_rate": 5e-05, + "loss": 1.6085, + "num_input_tokens_seen": 176172460, + "step": 2660 + }, + { + "epoch": 0.24898207516263396, + "loss": 1.6445289850234985, + "loss_ce": 0.007321933750063181, + "loss_iou": 0.6796875, + "loss_num": 0.05615234375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 176172460, + "step": 2660 + }, + { + "epoch": 0.2490756774465297, + "grad_norm": 17.083534240722656, + "learning_rate": 5e-05, + "loss": 1.5867, + "num_input_tokens_seen": 176237648, + "step": 2661 + }, + { + "epoch": 0.2490756774465297, + "loss": 1.48751962184906, + "loss_ce": 0.008515719324350357, + "loss_iou": 0.5625, + "loss_num": 0.0703125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 176237648, + "step": 2661 + }, + { + "epoch": 0.24916927973042544, + "grad_norm": 18.185260772705078, + "learning_rate": 5e-05, + "loss": 1.3118, + "num_input_tokens_seen": 176303192, + "step": 2662 + }, + { + "epoch": 0.24916927973042544, + "loss": 1.2686445713043213, + "loss_ce": 0.002531296107918024, + "loss_iou": 0.546875, + "loss_num": 0.03466796875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 176303192, + "step": 2662 + }, + { + "epoch": 0.24926288201432115, + "grad_norm": 37.6169319152832, + "learning_rate": 5e-05, + "loss": 1.2758, + "num_input_tokens_seen": 176368208, + "step": 2663 + }, + { + "epoch": 0.24926288201432115, + "loss": 1.2598035335540771, + "loss_ce": 0.003944148309528828, + "loss_iou": 0.51953125, + "loss_num": 0.04296875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 176368208, + "step": 2663 + }, + { + "epoch": 0.24935648429821688, + "grad_norm": 21.37662696838379, + "learning_rate": 5e-05, + "loss": 1.4104, + "num_input_tokens_seen": 176434056, + "step": 2664 + }, + { + "epoch": 0.24935648429821688, + "loss": 1.366624116897583, + "loss_ce": 0.0018779993988573551, + "loss_iou": 0.5625, + "loss_num": 0.04736328125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 176434056, + "step": 2664 + }, + { + "epoch": 0.2494500865821126, + "grad_norm": 29.62290382385254, + "learning_rate": 5e-05, + "loss": 1.2874, + "num_input_tokens_seen": 176500160, + "step": 2665 + }, + { + "epoch": 0.2494500865821126, + "loss": 1.2492676973342896, + "loss_ce": 0.00415048748254776, + "loss_iou": 0.50390625, + "loss_num": 0.047119140625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 176500160, + "step": 2665 + }, + { + "epoch": 0.24954368886600833, + "grad_norm": 37.912166595458984, + "learning_rate": 5e-05, + "loss": 1.4483, + "num_input_tokens_seen": 176567612, + "step": 2666 + }, + { + "epoch": 0.24954368886600833, + "loss": 1.4608955383300781, + "loss_ce": 0.00386425806209445, + "loss_iou": 0.61328125, + "loss_num": 0.04541015625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 176567612, + "step": 2666 + }, + { + "epoch": 0.24963729114990404, + "grad_norm": 27.29871368408203, + "learning_rate": 5e-05, + "loss": 1.7305, + "num_input_tokens_seen": 176633248, + "step": 2667 + }, + { + "epoch": 0.24963729114990404, + "loss": 1.5257866382598877, + "loss_ce": 0.006255402695387602, + "loss_iou": 0.66796875, + "loss_num": 0.03662109375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 176633248, + "step": 2667 + }, + { + "epoch": 0.24973089343379978, + "grad_norm": 26.80462646484375, + "learning_rate": 5e-05, + "loss": 1.5406, + "num_input_tokens_seen": 176699628, + "step": 2668 + }, + { + "epoch": 0.24973089343379978, + "loss": 1.4624890089035034, + "loss_ce": 0.003504673484712839, + "loss_iou": 0.609375, + "loss_num": 0.048095703125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 176699628, + "step": 2668 + }, + { + "epoch": 0.24982449571769552, + "grad_norm": 25.51641845703125, + "learning_rate": 5e-05, + "loss": 1.2969, + "num_input_tokens_seen": 176765148, + "step": 2669 + }, + { + "epoch": 0.24982449571769552, + "loss": 1.2877111434936523, + "loss_ce": 0.0035315491259098053, + "loss_iou": 0.52734375, + "loss_num": 0.046142578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 176765148, + "step": 2669 + }, + { + "epoch": 0.24991809800159123, + "grad_norm": 24.73008918762207, + "learning_rate": 5e-05, + "loss": 1.4034, + "num_input_tokens_seen": 176832276, + "step": 2670 + }, + { + "epoch": 0.24991809800159123, + "loss": 1.4069923162460327, + "loss_ce": 0.00367200979962945, + "loss_iou": 0.578125, + "loss_num": 0.049560546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 176832276, + "step": 2670 + }, + { + "epoch": 0.25001170028548697, + "grad_norm": 19.667558670043945, + "learning_rate": 5e-05, + "loss": 1.3253, + "num_input_tokens_seen": 176899092, + "step": 2671 + }, + { + "epoch": 0.25001170028548697, + "loss": 1.4717576503753662, + "loss_ce": 0.0098434928804636, + "loss_iou": 0.5859375, + "loss_num": 0.0576171875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 176899092, + "step": 2671 + }, + { + "epoch": 0.2501053025693827, + "grad_norm": 38.891414642333984, + "learning_rate": 5e-05, + "loss": 1.25, + "num_input_tokens_seen": 176964424, + "step": 2672 + }, + { + "epoch": 0.2501053025693827, + "loss": 1.4178887605667114, + "loss_ce": 0.0038262358866631985, + "loss_iou": 0.6171875, + "loss_num": 0.0361328125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 176964424, + "step": 2672 + }, + { + "epoch": 0.25019890485327845, + "grad_norm": 28.1275577545166, + "learning_rate": 5e-05, + "loss": 1.35, + "num_input_tokens_seen": 177029964, + "step": 2673 + }, + { + "epoch": 0.25019890485327845, + "loss": 1.209371566772461, + "loss_ce": 0.003805126529186964, + "loss_iou": 0.5078125, + "loss_num": 0.03759765625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 177029964, + "step": 2673 + }, + { + "epoch": 0.25029250713717416, + "grad_norm": 21.706510543823242, + "learning_rate": 5e-05, + "loss": 1.477, + "num_input_tokens_seen": 177096660, + "step": 2674 + }, + { + "epoch": 0.25029250713717416, + "loss": 1.4117348194122314, + "loss_ce": 0.0035316282883286476, + "loss_iou": 0.609375, + "loss_num": 0.03857421875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 177096660, + "step": 2674 + }, + { + "epoch": 0.25038610942106987, + "grad_norm": 28.70165252685547, + "learning_rate": 5e-05, + "loss": 1.3448, + "num_input_tokens_seen": 177163348, + "step": 2675 + }, + { + "epoch": 0.25038610942106987, + "loss": 1.3969063758850098, + "loss_ce": 0.007257949095219374, + "loss_iou": 0.578125, + "loss_num": 0.046142578125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 177163348, + "step": 2675 + }, + { + "epoch": 0.2504797117049656, + "grad_norm": 16.7537841796875, + "learning_rate": 5e-05, + "loss": 1.32, + "num_input_tokens_seen": 177230340, + "step": 2676 + }, + { + "epoch": 0.2504797117049656, + "loss": 1.1728780269622803, + "loss_ce": 0.0053975507616996765, + "loss_iou": 0.51171875, + "loss_num": 0.029052734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 177230340, + "step": 2676 + }, + { + "epoch": 0.25057331398886135, + "grad_norm": 16.73251724243164, + "learning_rate": 5e-05, + "loss": 1.2454, + "num_input_tokens_seen": 177296248, + "step": 2677 + }, + { + "epoch": 0.25057331398886135, + "loss": 0.9803268909454346, + "loss_ce": 0.0032761115580797195, + "loss_iou": 0.388671875, + "loss_num": 0.0400390625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 177296248, + "step": 2677 + }, + { + "epoch": 0.25066691627275706, + "grad_norm": 36.54281234741211, + "learning_rate": 5e-05, + "loss": 1.6695, + "num_input_tokens_seen": 177362724, + "step": 2678 + }, + { + "epoch": 0.25066691627275706, + "loss": 1.7774443626403809, + "loss_ce": 0.004983500111848116, + "loss_iou": 0.72265625, + "loss_num": 0.0654296875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 177362724, + "step": 2678 + }, + { + "epoch": 0.25076051855665277, + "grad_norm": 20.147615432739258, + "learning_rate": 5e-05, + "loss": 1.416, + "num_input_tokens_seen": 177427576, + "step": 2679 + }, + { + "epoch": 0.25076051855665277, + "loss": 1.381352424621582, + "loss_ce": 0.009282177314162254, + "loss_iou": 0.5703125, + "loss_num": 0.046630859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 177427576, + "step": 2679 + }, + { + "epoch": 0.25085412084054853, + "grad_norm": 52.44255065917969, + "learning_rate": 5e-05, + "loss": 1.335, + "num_input_tokens_seen": 177494076, + "step": 2680 + }, + { + "epoch": 0.25085412084054853, + "loss": 1.235190510749817, + "loss_ce": 0.007163163274526596, + "loss_iou": 0.4921875, + "loss_num": 0.04833984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 177494076, + "step": 2680 + }, + { + "epoch": 0.25094772312444424, + "grad_norm": 23.108659744262695, + "learning_rate": 5e-05, + "loss": 1.303, + "num_input_tokens_seen": 177559392, + "step": 2681 + }, + { + "epoch": 0.25094772312444424, + "loss": 1.3295401334762573, + "loss_ce": 0.007030394859611988, + "loss_iou": 0.490234375, + "loss_num": 0.068359375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 177559392, + "step": 2681 + }, + { + "epoch": 0.25104132540833995, + "grad_norm": 46.50721740722656, + "learning_rate": 5e-05, + "loss": 1.5887, + "num_input_tokens_seen": 177625180, + "step": 2682 + }, + { + "epoch": 0.25104132540833995, + "loss": 1.9244065284729004, + "loss_ce": 0.007414279039949179, + "loss_iou": 0.7890625, + "loss_num": 0.06689453125, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 177625180, + "step": 2682 + }, + { + "epoch": 0.25113492769223567, + "grad_norm": 20.515562057495117, + "learning_rate": 5e-05, + "loss": 1.9332, + "num_input_tokens_seen": 177691284, + "step": 2683 + }, + { + "epoch": 0.25113492769223567, + "loss": 2.0065956115722656, + "loss_ce": 0.004642449785023928, + "loss_iou": 0.82421875, + "loss_num": 0.06982421875, + "loss_xval": 2.0, + "num_input_tokens_seen": 177691284, + "step": 2683 + }, + { + "epoch": 0.25122852997613143, + "grad_norm": 29.184473037719727, + "learning_rate": 5e-05, + "loss": 1.2766, + "num_input_tokens_seen": 177757404, + "step": 2684 + }, + { + "epoch": 0.25122852997613143, + "loss": 1.3202893733978271, + "loss_ce": 0.0024182756897062063, + "loss_iou": 0.546875, + "loss_num": 0.045654296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 177757404, + "step": 2684 + }, + { + "epoch": 0.25132213226002714, + "grad_norm": 29.344213485717773, + "learning_rate": 5e-05, + "loss": 1.4858, + "num_input_tokens_seen": 177822436, + "step": 2685 + }, + { + "epoch": 0.25132213226002714, + "loss": 1.5259044170379639, + "loss_ce": 0.006861462257802486, + "loss_iou": 0.62109375, + "loss_num": 0.05517578125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 177822436, + "step": 2685 + }, + { + "epoch": 0.25141573454392285, + "grad_norm": 23.936859130859375, + "learning_rate": 5e-05, + "loss": 1.55, + "num_input_tokens_seen": 177887576, + "step": 2686 + }, + { + "epoch": 0.25141573454392285, + "loss": 1.5547282695770264, + "loss_ce": 0.006876666098833084, + "loss_iou": 0.59375, + "loss_num": 0.0712890625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 177887576, + "step": 2686 + }, + { + "epoch": 0.2515093368278186, + "grad_norm": 23.946048736572266, + "learning_rate": 5e-05, + "loss": 1.2561, + "num_input_tokens_seen": 177953140, + "step": 2687 + }, + { + "epoch": 0.2515093368278186, + "loss": 1.2796629667282104, + "loss_ce": 0.0067137982696294785, + "loss_iou": 0.51171875, + "loss_num": 0.049560546875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 177953140, + "step": 2687 + }, + { + "epoch": 0.25160293911171433, + "grad_norm": 21.224424362182617, + "learning_rate": 5e-05, + "loss": 1.1258, + "num_input_tokens_seen": 178019340, + "step": 2688 + }, + { + "epoch": 0.25160293911171433, + "loss": 1.2026559114456177, + "loss_ce": 0.006366831250488758, + "loss_iou": 0.427734375, + "loss_num": 0.06787109375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 178019340, + "step": 2688 + }, + { + "epoch": 0.25169654139561004, + "grad_norm": 26.06874656677246, + "learning_rate": 5e-05, + "loss": 1.326, + "num_input_tokens_seen": 178085288, + "step": 2689 + }, + { + "epoch": 0.25169654139561004, + "loss": 1.1321706771850586, + "loss_ce": 0.006194021087139845, + "loss_iou": 0.46875, + "loss_num": 0.0380859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 178085288, + "step": 2689 + }, + { + "epoch": 0.2517901436795058, + "grad_norm": 38.97039031982422, + "learning_rate": 5e-05, + "loss": 1.5214, + "num_input_tokens_seen": 178151204, + "step": 2690 + }, + { + "epoch": 0.2517901436795058, + "loss": 1.8669145107269287, + "loss_ce": 0.004609842784702778, + "loss_iou": 0.74609375, + "loss_num": 0.07373046875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 178151204, + "step": 2690 + }, + { + "epoch": 0.2518837459634015, + "grad_norm": 22.12947654724121, + "learning_rate": 5e-05, + "loss": 1.4491, + "num_input_tokens_seen": 178218592, + "step": 2691 + }, + { + "epoch": 0.2518837459634015, + "loss": 1.5539872646331787, + "loss_ce": 0.007600470911711454, + "loss_iou": 0.65234375, + "loss_num": 0.048095703125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 178218592, + "step": 2691 + }, + { + "epoch": 0.25197734824729723, + "grad_norm": 17.946393966674805, + "learning_rate": 5e-05, + "loss": 1.4946, + "num_input_tokens_seen": 178284356, + "step": 2692 + }, + { + "epoch": 0.25197734824729723, + "loss": 1.4267549514770508, + "loss_ce": 0.009762648493051529, + "loss_iou": 0.609375, + "loss_num": 0.0400390625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 178284356, + "step": 2692 + }, + { + "epoch": 0.25207095053119294, + "grad_norm": 29.452123641967773, + "learning_rate": 5e-05, + "loss": 1.4284, + "num_input_tokens_seen": 178351192, + "step": 2693 + }, + { + "epoch": 0.25207095053119294, + "loss": 1.3941285610198975, + "loss_ce": 0.005944958887994289, + "loss_iou": 0.5390625, + "loss_num": 0.0615234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 178351192, + "step": 2693 + }, + { + "epoch": 0.2521645528150887, + "grad_norm": 22.85008430480957, + "learning_rate": 5e-05, + "loss": 1.5017, + "num_input_tokens_seen": 178417352, + "step": 2694 + }, + { + "epoch": 0.2521645528150887, + "loss": 1.587036371231079, + "loss_ce": 0.007446564733982086, + "loss_iou": 0.6328125, + "loss_num": 0.0625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 178417352, + "step": 2694 + }, + { + "epoch": 0.2522581550989844, + "grad_norm": 16.949966430664062, + "learning_rate": 5e-05, + "loss": 1.555, + "num_input_tokens_seen": 178483176, + "step": 2695 + }, + { + "epoch": 0.2522581550989844, + "loss": 1.4507124423980713, + "loss_ce": 0.010282731615006924, + "loss_iou": 0.60546875, + "loss_num": 0.04638671875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 178483176, + "step": 2695 + }, + { + "epoch": 0.2523517573828801, + "grad_norm": 25.912893295288086, + "learning_rate": 5e-05, + "loss": 1.3964, + "num_input_tokens_seen": 178549464, + "step": 2696 + }, + { + "epoch": 0.2523517573828801, + "loss": 1.68153977394104, + "loss_ce": 0.009664775803685188, + "loss_iou": 0.68359375, + "loss_num": 0.060791015625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 178549464, + "step": 2696 + }, + { + "epoch": 0.2524453596667759, + "grad_norm": 27.0345516204834, + "learning_rate": 5e-05, + "loss": 1.45, + "num_input_tokens_seen": 178615592, + "step": 2697 + }, + { + "epoch": 0.2524453596667759, + "loss": 1.2778229713439941, + "loss_ce": 0.00438541267067194, + "loss_iou": 0.5078125, + "loss_num": 0.0517578125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 178615592, + "step": 2697 + }, + { + "epoch": 0.2525389619506716, + "grad_norm": 18.903898239135742, + "learning_rate": 5e-05, + "loss": 1.3468, + "num_input_tokens_seen": 178682416, + "step": 2698 + }, + { + "epoch": 0.2525389619506716, + "loss": 1.453385591506958, + "loss_ce": 0.0051433793269097805, + "loss_iou": 0.62890625, + "loss_num": 0.03857421875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 178682416, + "step": 2698 + }, + { + "epoch": 0.2526325642345673, + "grad_norm": 82.03213500976562, + "learning_rate": 5e-05, + "loss": 1.2916, + "num_input_tokens_seen": 178748840, + "step": 2699 + }, + { + "epoch": 0.2526325642345673, + "loss": 1.226028561592102, + "loss_ce": 0.003860627766698599, + "loss_iou": 0.4765625, + "loss_num": 0.053466796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 178748840, + "step": 2699 + }, + { + "epoch": 0.252726166518463, + "grad_norm": 21.426063537597656, + "learning_rate": 5e-05, + "loss": 1.2913, + "num_input_tokens_seen": 178815888, + "step": 2700 + }, + { + "epoch": 0.252726166518463, + "loss": 1.0827887058258057, + "loss_ce": 0.002222394570708275, + "loss_iou": 0.458984375, + "loss_num": 0.032470703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 178815888, + "step": 2700 + }, + { + "epoch": 0.2528197688023588, + "grad_norm": 35.58570861816406, + "learning_rate": 5e-05, + "loss": 1.3755, + "num_input_tokens_seen": 178882084, + "step": 2701 + }, + { + "epoch": 0.2528197688023588, + "loss": 1.2529616355895996, + "loss_ce": 0.001496872864663601, + "loss_iou": 0.54296875, + "loss_num": 0.03271484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 178882084, + "step": 2701 + }, + { + "epoch": 0.2529133710862545, + "grad_norm": 17.67931365966797, + "learning_rate": 5e-05, + "loss": 1.7459, + "num_input_tokens_seen": 178948252, + "step": 2702 + }, + { + "epoch": 0.2529133710862545, + "loss": 1.6037399768829346, + "loss_ce": 0.004130593966692686, + "loss_iou": 0.68359375, + "loss_num": 0.046142578125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 178948252, + "step": 2702 + }, + { + "epoch": 0.2530069733701502, + "grad_norm": 17.60394859313965, + "learning_rate": 5e-05, + "loss": 1.4507, + "num_input_tokens_seen": 179014804, + "step": 2703 + }, + { + "epoch": 0.2530069733701502, + "loss": 1.5327136516571045, + "loss_ce": 0.002928511705249548, + "loss_iou": 0.59375, + "loss_num": 0.06884765625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 179014804, + "step": 2703 + }, + { + "epoch": 0.253100575654046, + "grad_norm": 17.0250244140625, + "learning_rate": 5e-05, + "loss": 1.4212, + "num_input_tokens_seen": 179081268, + "step": 2704 + }, + { + "epoch": 0.253100575654046, + "loss": 1.2025288343429565, + "loss_ce": 0.00331004592590034, + "loss_iou": 0.48046875, + "loss_num": 0.04736328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 179081268, + "step": 2704 + }, + { + "epoch": 0.2531941779379417, + "grad_norm": 33.405540466308594, + "learning_rate": 5e-05, + "loss": 1.4586, + "num_input_tokens_seen": 179147568, + "step": 2705 + }, + { + "epoch": 0.2531941779379417, + "loss": 1.2397079467773438, + "loss_ce": 0.0077743493020534515, + "loss_iou": 0.427734375, + "loss_num": 0.0751953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 179147568, + "step": 2705 + }, + { + "epoch": 0.2532877802218374, + "grad_norm": 21.995677947998047, + "learning_rate": 5e-05, + "loss": 1.4927, + "num_input_tokens_seen": 179212900, + "step": 2706 + }, + { + "epoch": 0.2532877802218374, + "loss": 1.6095725297927856, + "loss_ce": 0.008498257957398891, + "loss_iou": 0.65234375, + "loss_num": 0.0595703125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 179212900, + "step": 2706 + }, + { + "epoch": 0.25338138250573317, + "grad_norm": 35.696563720703125, + "learning_rate": 5e-05, + "loss": 1.3146, + "num_input_tokens_seen": 179279756, + "step": 2707 + }, + { + "epoch": 0.25338138250573317, + "loss": 1.4066588878631592, + "loss_ce": 0.0043151117861270905, + "loss_iou": 0.6171875, + "loss_num": 0.033447265625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 179279756, + "step": 2707 + }, + { + "epoch": 0.2534749847896289, + "grad_norm": 18.243179321289062, + "learning_rate": 5e-05, + "loss": 1.611, + "num_input_tokens_seen": 179346584, + "step": 2708 + }, + { + "epoch": 0.2534749847896289, + "loss": 1.4987397193908691, + "loss_ce": 0.003622512798756361, + "loss_iou": 0.6484375, + "loss_num": 0.03955078125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 179346584, + "step": 2708 + }, + { + "epoch": 0.2535685870735246, + "grad_norm": 19.568614959716797, + "learning_rate": 5e-05, + "loss": 1.3211, + "num_input_tokens_seen": 179412188, + "step": 2709 + }, + { + "epoch": 0.2535685870735246, + "loss": 1.2492930889129639, + "loss_ce": 0.004664166830480099, + "loss_iou": 0.515625, + "loss_num": 0.043212890625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 179412188, + "step": 2709 + }, + { + "epoch": 0.2536621893574203, + "grad_norm": 37.28907775878906, + "learning_rate": 5e-05, + "loss": 1.1901, + "num_input_tokens_seen": 179477196, + "step": 2710 + }, + { + "epoch": 0.2536621893574203, + "loss": 1.176241159439087, + "loss_ce": 0.0024130609817802906, + "loss_iou": 0.486328125, + "loss_num": 0.0400390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 179477196, + "step": 2710 + }, + { + "epoch": 0.25375579164131606, + "grad_norm": 21.42214012145996, + "learning_rate": 5e-05, + "loss": 1.6679, + "num_input_tokens_seen": 179542548, + "step": 2711 + }, + { + "epoch": 0.25375579164131606, + "loss": 1.4606550931930542, + "loss_ce": 0.0041120825335383415, + "loss_iou": 0.5703125, + "loss_num": 0.06298828125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 179542548, + "step": 2711 + }, + { + "epoch": 0.2538493939252118, + "grad_norm": 31.344505310058594, + "learning_rate": 5e-05, + "loss": 1.3836, + "num_input_tokens_seen": 179609724, + "step": 2712 + }, + { + "epoch": 0.2538493939252118, + "loss": 1.4715662002563477, + "loss_ce": 0.007699033711105585, + "loss_iou": 0.59765625, + "loss_num": 0.0537109375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 179609724, + "step": 2712 + }, + { + "epoch": 0.2539429962091075, + "grad_norm": 36.948455810546875, + "learning_rate": 5e-05, + "loss": 1.5696, + "num_input_tokens_seen": 179676504, + "step": 2713 + }, + { + "epoch": 0.2539429962091075, + "loss": 1.5433683395385742, + "loss_ce": 0.003329339437186718, + "loss_iou": 0.66796875, + "loss_num": 0.04052734375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 179676504, + "step": 2713 + }, + { + "epoch": 0.25403659849300325, + "grad_norm": 18.40770721435547, + "learning_rate": 5e-05, + "loss": 1.4374, + "num_input_tokens_seen": 179742200, + "step": 2714 + }, + { + "epoch": 0.25403659849300325, + "loss": 1.5147056579589844, + "loss_ce": 0.0020102495327591896, + "loss_iou": 0.6328125, + "loss_num": 0.04931640625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 179742200, + "step": 2714 + }, + { + "epoch": 0.25413020077689896, + "grad_norm": 17.650564193725586, + "learning_rate": 5e-05, + "loss": 1.2574, + "num_input_tokens_seen": 179807896, + "step": 2715 + }, + { + "epoch": 0.25413020077689896, + "loss": 1.2243345975875854, + "loss_ce": 0.0050962925888597965, + "loss_iou": 0.52734375, + "loss_num": 0.0322265625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 179807896, + "step": 2715 + }, + { + "epoch": 0.2542238030607947, + "grad_norm": 22.725345611572266, + "learning_rate": 5e-05, + "loss": 1.1883, + "num_input_tokens_seen": 179873904, + "step": 2716 + }, + { + "epoch": 0.2542238030607947, + "loss": 1.20273756980896, + "loss_ce": 0.008310049772262573, + "loss_iou": 0.458984375, + "loss_num": 0.054931640625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 179873904, + "step": 2716 + }, + { + "epoch": 0.2543174053446904, + "grad_norm": 36.27304458618164, + "learning_rate": 5e-05, + "loss": 1.4144, + "num_input_tokens_seen": 179941544, + "step": 2717 + }, + { + "epoch": 0.2543174053446904, + "loss": 1.3572330474853516, + "loss_ce": 0.004693983122706413, + "loss_iou": 0.59765625, + "loss_num": 0.0311279296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 179941544, + "step": 2717 + }, + { + "epoch": 0.25441100762858615, + "grad_norm": 23.77774429321289, + "learning_rate": 5e-05, + "loss": 1.4283, + "num_input_tokens_seen": 180007776, + "step": 2718 + }, + { + "epoch": 0.25441100762858615, + "loss": 1.396384596824646, + "loss_ce": 0.009177593514323235, + "loss_iou": 0.58984375, + "loss_num": 0.041015625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 180007776, + "step": 2718 + }, + { + "epoch": 0.25450460991248186, + "grad_norm": 16.692039489746094, + "learning_rate": 5e-05, + "loss": 1.4522, + "num_input_tokens_seen": 180074020, + "step": 2719 + }, + { + "epoch": 0.25450460991248186, + "loss": 1.4519904851913452, + "loss_ce": 0.0047248369082808495, + "loss_iou": 0.5859375, + "loss_num": 0.05517578125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 180074020, + "step": 2719 + }, + { + "epoch": 0.25459821219637757, + "grad_norm": 19.383583068847656, + "learning_rate": 5e-05, + "loss": 1.2771, + "num_input_tokens_seen": 180139932, + "step": 2720 + }, + { + "epoch": 0.25459821219637757, + "loss": 1.0887398719787598, + "loss_ce": 0.005243788007646799, + "loss_iou": 0.443359375, + "loss_num": 0.0390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 180139932, + "step": 2720 + }, + { + "epoch": 0.25469181448027334, + "grad_norm": 18.619901657104492, + "learning_rate": 5e-05, + "loss": 1.3926, + "num_input_tokens_seen": 180205940, + "step": 2721 + }, + { + "epoch": 0.25469181448027334, + "loss": 1.3840503692626953, + "loss_ce": 0.004167487379163504, + "loss_iou": 0.52734375, + "loss_num": 0.0654296875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 180205940, + "step": 2721 + }, + { + "epoch": 0.25478541676416905, + "grad_norm": 31.04913330078125, + "learning_rate": 5e-05, + "loss": 1.5844, + "num_input_tokens_seen": 180272048, + "step": 2722 + }, + { + "epoch": 0.25478541676416905, + "loss": 1.5141956806182861, + "loss_ce": 0.006871445570141077, + "loss_iou": 0.62890625, + "loss_num": 0.05029296875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 180272048, + "step": 2722 + }, + { + "epoch": 0.25487901904806476, + "grad_norm": 43.517356872558594, + "learning_rate": 5e-05, + "loss": 1.3605, + "num_input_tokens_seen": 180338248, + "step": 2723 + }, + { + "epoch": 0.25487901904806476, + "loss": 1.1855039596557617, + "loss_ce": 0.0045347800478339195, + "loss_iou": 0.484375, + "loss_num": 0.04296875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 180338248, + "step": 2723 + }, + { + "epoch": 0.2549726213319605, + "grad_norm": 25.927207946777344, + "learning_rate": 5e-05, + "loss": 1.4294, + "num_input_tokens_seen": 180403364, + "step": 2724 + }, + { + "epoch": 0.2549726213319605, + "loss": 1.2973136901855469, + "loss_ce": 0.00727462861686945, + "loss_iou": 0.54296875, + "loss_num": 0.040283203125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 180403364, + "step": 2724 + }, + { + "epoch": 0.25506622361585624, + "grad_norm": 40.147369384765625, + "learning_rate": 5e-05, + "loss": 1.2546, + "num_input_tokens_seen": 180470304, + "step": 2725 + }, + { + "epoch": 0.25506622361585624, + "loss": 0.9937068223953247, + "loss_ce": 0.0034723724238574505, + "loss_iou": 0.392578125, + "loss_num": 0.04052734375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 180470304, + "step": 2725 + }, + { + "epoch": 0.25515982589975195, + "grad_norm": 12.200100898742676, + "learning_rate": 5e-05, + "loss": 1.5515, + "num_input_tokens_seen": 180536584, + "step": 2726 + }, + { + "epoch": 0.25515982589975195, + "loss": 1.5923805236816406, + "loss_ce": 0.002536846324801445, + "loss_iou": 0.65625, + "loss_num": 0.0556640625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 180536584, + "step": 2726 + }, + { + "epoch": 0.25525342818364766, + "grad_norm": 35.998226165771484, + "learning_rate": 5e-05, + "loss": 1.3385, + "num_input_tokens_seen": 180603080, + "step": 2727 + }, + { + "epoch": 0.25525342818364766, + "loss": 1.5598630905151367, + "loss_ce": 0.005175570957362652, + "loss_iou": 0.609375, + "loss_num": 0.06689453125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 180603080, + "step": 2727 + }, + { + "epoch": 0.2553470304675434, + "grad_norm": 42.222957611083984, + "learning_rate": 5e-05, + "loss": 1.4882, + "num_input_tokens_seen": 180669728, + "step": 2728 + }, + { + "epoch": 0.2553470304675434, + "loss": 1.555783748626709, + "loss_ce": 0.0069555919617414474, + "loss_iou": 0.6484375, + "loss_num": 0.050537109375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 180669728, + "step": 2728 + }, + { + "epoch": 0.25544063275143913, + "grad_norm": 24.986804962158203, + "learning_rate": 5e-05, + "loss": 1.6722, + "num_input_tokens_seen": 180736492, + "step": 2729 + }, + { + "epoch": 0.25544063275143913, + "loss": 1.943702220916748, + "loss_ce": 0.002295971615239978, + "loss_iou": 0.8203125, + "loss_num": 0.06103515625, + "loss_xval": 1.9375, + "num_input_tokens_seen": 180736492, + "step": 2729 + }, + { + "epoch": 0.25553423503533484, + "grad_norm": 26.335451126098633, + "learning_rate": 5e-05, + "loss": 1.2464, + "num_input_tokens_seen": 180802144, + "step": 2730 + }, + { + "epoch": 0.25553423503533484, + "loss": 1.263014554977417, + "loss_ce": 0.004652736242860556, + "loss_iou": 0.49609375, + "loss_num": 0.052978515625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 180802144, + "step": 2730 + }, + { + "epoch": 0.2556278373192306, + "grad_norm": 20.515073776245117, + "learning_rate": 5e-05, + "loss": 1.4534, + "num_input_tokens_seen": 180868048, + "step": 2731 + }, + { + "epoch": 0.2556278373192306, + "loss": 1.412936806678772, + "loss_ce": 0.002780516864731908, + "loss_iou": 0.59375, + "loss_num": 0.044189453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 180868048, + "step": 2731 + }, + { + "epoch": 0.2557214396031263, + "grad_norm": 31.924388885498047, + "learning_rate": 5e-05, + "loss": 1.4156, + "num_input_tokens_seen": 180934952, + "step": 2732 + }, + { + "epoch": 0.2557214396031263, + "loss": 1.2068747282028198, + "loss_ce": 0.005702856928110123, + "loss_iou": 0.515625, + "loss_num": 0.034423828125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 180934952, + "step": 2732 + }, + { + "epoch": 0.25581504188702203, + "grad_norm": 22.622175216674805, + "learning_rate": 5e-05, + "loss": 1.5098, + "num_input_tokens_seen": 181001384, + "step": 2733 + }, + { + "epoch": 0.25581504188702203, + "loss": 1.5945289134979248, + "loss_ce": 0.0027319621294736862, + "loss_iou": 0.6640625, + "loss_num": 0.053466796875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 181001384, + "step": 2733 + }, + { + "epoch": 0.25590864417091774, + "grad_norm": 81.16958618164062, + "learning_rate": 5e-05, + "loss": 1.3243, + "num_input_tokens_seen": 181067248, + "step": 2734 + }, + { + "epoch": 0.25590864417091774, + "loss": 1.202168345451355, + "loss_ce": 0.003926113247871399, + "loss_iou": 0.51171875, + "loss_num": 0.03466796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 181067248, + "step": 2734 + }, + { + "epoch": 0.2560022464548135, + "grad_norm": 43.052371978759766, + "learning_rate": 5e-05, + "loss": 1.3143, + "num_input_tokens_seen": 181133096, + "step": 2735 + }, + { + "epoch": 0.2560022464548135, + "loss": 1.3479100465774536, + "loss_ce": 0.005136595107614994, + "loss_iou": 0.578125, + "loss_num": 0.03759765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 181133096, + "step": 2735 + }, + { + "epoch": 0.2560958487387092, + "grad_norm": 23.68447494506836, + "learning_rate": 5e-05, + "loss": 1.2908, + "num_input_tokens_seen": 181199216, + "step": 2736 + }, + { + "epoch": 0.2560958487387092, + "loss": 1.4186744689941406, + "loss_ce": 0.002170453779399395, + "loss_iou": 0.609375, + "loss_num": 0.0390625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 181199216, + "step": 2736 + }, + { + "epoch": 0.25618945102260493, + "grad_norm": 24.601858139038086, + "learning_rate": 5e-05, + "loss": 1.3755, + "num_input_tokens_seen": 181266216, + "step": 2737 + }, + { + "epoch": 0.25618945102260493, + "loss": 1.2716262340545654, + "loss_ce": 0.0030715835746377707, + "loss_iou": 0.54296875, + "loss_num": 0.03662109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 181266216, + "step": 2737 + }, + { + "epoch": 0.2562830533065007, + "grad_norm": 35.577396392822266, + "learning_rate": 5e-05, + "loss": 1.2007, + "num_input_tokens_seen": 181331656, + "step": 2738 + }, + { + "epoch": 0.2562830533065007, + "loss": 1.2889866828918457, + "loss_ce": 0.004318690858781338, + "loss_iou": 0.53125, + "loss_num": 0.04541015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 181331656, + "step": 2738 + }, + { + "epoch": 0.2563766555903964, + "grad_norm": 22.694122314453125, + "learning_rate": 5e-05, + "loss": 1.6981, + "num_input_tokens_seen": 181396848, + "step": 2739 + }, + { + "epoch": 0.2563766555903964, + "loss": 1.7223371267318726, + "loss_ce": 0.008469952270388603, + "loss_iou": 0.7265625, + "loss_num": 0.051513671875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 181396848, + "step": 2739 + }, + { + "epoch": 0.2564702578742921, + "grad_norm": 18.788219451904297, + "learning_rate": 5e-05, + "loss": 1.2121, + "num_input_tokens_seen": 181464172, + "step": 2740 + }, + { + "epoch": 0.2564702578742921, + "loss": 1.1174161434173584, + "loss_ce": 0.004623170010745525, + "loss_iou": 0.482421875, + "loss_num": 0.029541015625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 181464172, + "step": 2740 + }, + { + "epoch": 0.2565638601581879, + "grad_norm": 21.85161590576172, + "learning_rate": 5e-05, + "loss": 1.3487, + "num_input_tokens_seen": 181530412, + "step": 2741 + }, + { + "epoch": 0.2565638601581879, + "loss": 1.582802653312683, + "loss_ce": 0.005166033748537302, + "loss_iou": 0.63671875, + "loss_num": 0.060546875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 181530412, + "step": 2741 + }, + { + "epoch": 0.2566574624420836, + "grad_norm": 21.856889724731445, + "learning_rate": 5e-05, + "loss": 1.261, + "num_input_tokens_seen": 181595716, + "step": 2742 + }, + { + "epoch": 0.2566574624420836, + "loss": 1.238128662109375, + "loss_ce": 0.005218507722020149, + "loss_iou": 0.462890625, + "loss_num": 0.0615234375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 181595716, + "step": 2742 + }, + { + "epoch": 0.2567510647259793, + "grad_norm": 28.9006290435791, + "learning_rate": 5e-05, + "loss": 1.3331, + "num_input_tokens_seen": 181661020, + "step": 2743 + }, + { + "epoch": 0.2567510647259793, + "loss": 1.3242883682250977, + "loss_ce": 0.0044641438871622086, + "loss_iou": 0.5234375, + "loss_num": 0.054443359375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 181661020, + "step": 2743 + }, + { + "epoch": 0.256844667009875, + "grad_norm": 36.75323486328125, + "learning_rate": 5e-05, + "loss": 1.388, + "num_input_tokens_seen": 181724828, + "step": 2744 + }, + { + "epoch": 0.256844667009875, + "loss": 1.3701030015945435, + "loss_ce": 0.003892068285495043, + "loss_iou": 0.57421875, + "loss_num": 0.04443359375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 181724828, + "step": 2744 + }, + { + "epoch": 0.2569382692937708, + "grad_norm": 17.429616928100586, + "learning_rate": 5e-05, + "loss": 1.7901, + "num_input_tokens_seen": 181791348, + "step": 2745 + }, + { + "epoch": 0.2569382692937708, + "loss": 1.7535079717636108, + "loss_ce": 0.003385887946933508, + "loss_iou": 0.7265625, + "loss_num": 0.05859375, + "loss_xval": 1.75, + "num_input_tokens_seen": 181791348, + "step": 2745 + }, + { + "epoch": 0.2570318715776665, + "grad_norm": 17.206226348876953, + "learning_rate": 5e-05, + "loss": 1.6154, + "num_input_tokens_seen": 181859036, + "step": 2746 + }, + { + "epoch": 0.2570318715776665, + "loss": 1.644707202911377, + "loss_ce": 0.007011887151747942, + "loss_iou": 0.65625, + "loss_num": 0.0654296875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 181859036, + "step": 2746 + }, + { + "epoch": 0.2571254738615622, + "grad_norm": 28.9249324798584, + "learning_rate": 5e-05, + "loss": 1.316, + "num_input_tokens_seen": 181925472, + "step": 2747 + }, + { + "epoch": 0.2571254738615622, + "loss": 1.2439528703689575, + "loss_ce": 0.002009489107877016, + "loss_iou": 0.49609375, + "loss_num": 0.05029296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 181925472, + "step": 2747 + }, + { + "epoch": 0.25721907614545797, + "grad_norm": 28.013105392456055, + "learning_rate": 5e-05, + "loss": 1.3691, + "num_input_tokens_seen": 181992116, + "step": 2748 + }, + { + "epoch": 0.25721907614545797, + "loss": 1.388258695602417, + "loss_ce": 0.005446320399641991, + "loss_iou": 0.546875, + "loss_num": 0.0576171875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 181992116, + "step": 2748 + }, + { + "epoch": 0.2573126784293537, + "grad_norm": 35.12394714355469, + "learning_rate": 5e-05, + "loss": 1.4747, + "num_input_tokens_seen": 182057936, + "step": 2749 + }, + { + "epoch": 0.2573126784293537, + "loss": 1.5037202835083008, + "loss_ce": 0.0037203137762844563, + "loss_iou": 0.63671875, + "loss_num": 0.045654296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 182057936, + "step": 2749 + }, + { + "epoch": 0.2574062807132494, + "grad_norm": 21.23040008544922, + "learning_rate": 5e-05, + "loss": 1.5964, + "num_input_tokens_seen": 182123980, + "step": 2750 + }, + { + "epoch": 0.2574062807132494, + "eval_seeclick_CIoU": 0.1042308509349823, + "eval_seeclick_GIoU": 0.11520424857735634, + "eval_seeclick_IoU": 0.24981697648763657, + "eval_seeclick_MAE_all": 0.17666172981262207, + "eval_seeclick_MAE_h": 0.08429580554366112, + "eval_seeclick_MAE_w": 0.12163790687918663, + "eval_seeclick_MAE_x_boxes": 0.2853519022464752, + "eval_seeclick_MAE_y_boxes": 0.17233942449092865, + "eval_seeclick_NUM_probability": 0.9995305836200714, + "eval_seeclick_inside_bbox": 0.3895833343267441, + "eval_seeclick_loss": 2.7157585620880127, + "eval_seeclick_loss_ce": 0.014326652977615595, + "eval_seeclick_loss_iou": 0.934326171875, + "eval_seeclick_loss_num": 0.17431640625, + "eval_seeclick_loss_xval": 2.73876953125, + "eval_seeclick_runtime": 68.5211, + "eval_seeclick_samples_per_second": 0.686, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 182123980, + "step": 2750 + }, + { + "epoch": 0.2574062807132494, + "eval_icons_CIoU": -0.09721673280000687, + "eval_icons_GIoU": -0.002693554386496544, + "eval_icons_IoU": 0.0957326740026474, + "eval_icons_MAE_all": 0.18497255444526672, + "eval_icons_MAE_h": 0.139219768345356, + "eval_icons_MAE_w": 0.16029571741819382, + "eval_icons_MAE_x_boxes": 0.1328095756471157, + "eval_icons_MAE_y_boxes": 0.14838873222470284, + "eval_icons_NUM_probability": 0.9932429194450378, + "eval_icons_inside_bbox": 0.13715277798473835, + "eval_icons_loss": 2.913668155670166, + "eval_icons_loss_ce": 0.0013117790513206273, + "eval_icons_loss_iou": 1.00439453125, + "eval_icons_loss_num": 0.1877899169921875, + "eval_icons_loss_xval": 2.947265625, + "eval_icons_runtime": 92.2466, + "eval_icons_samples_per_second": 0.542, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 182123980, + "step": 2750 + }, + { + "epoch": 0.2574062807132494, + "eval_screenspot_CIoU": -0.0036787251010537148, + "eval_screenspot_GIoU": 0.03668480211248001, + "eval_screenspot_IoU": 0.16975745558738708, + "eval_screenspot_MAE_all": 0.19455142815907797, + "eval_screenspot_MAE_h": 0.1320437416434288, + "eval_screenspot_MAE_w": 0.16170996675888696, + "eval_screenspot_MAE_x_boxes": 0.26663068930308026, + "eval_screenspot_MAE_y_boxes": 0.13205540676911673, + "eval_screenspot_NUM_probability": 0.99985804160436, + "eval_screenspot_inside_bbox": 0.3283333381017049, + "eval_screenspot_loss": 2.9288907051086426, + "eval_screenspot_loss_ce": 0.008995980645219484, + "eval_screenspot_loss_iou": 0.9759114583333334, + "eval_screenspot_loss_num": 0.19524129231770834, + "eval_screenspot_loss_xval": 2.9270833333333335, + "eval_screenspot_runtime": 132.5325, + "eval_screenspot_samples_per_second": 0.672, + "eval_screenspot_steps_per_second": 0.023, + "num_input_tokens_seen": 182123980, + "step": 2750 + }, + { + "epoch": 0.2574062807132494, + "eval_compot_CIoU": -0.029431015253067017, + "eval_compot_GIoU": 0.039341045543551445, + "eval_compot_IoU": 0.14003757759928703, + "eval_compot_MAE_all": 0.19349998235702515, + "eval_compot_MAE_h": 0.07970436103641987, + "eval_compot_MAE_w": 0.21002302691340446, + "eval_compot_MAE_x_boxes": 0.19608338177204132, + "eval_compot_MAE_y_boxes": 0.1365150660276413, + "eval_compot_NUM_probability": 0.9998738169670105, + "eval_compot_inside_bbox": 0.2447916716337204, + "eval_compot_loss": 2.974236488342285, + "eval_compot_loss_ce": 0.0023781840573064983, + "eval_compot_loss_iou": 0.98388671875, + "eval_compot_loss_num": 0.204254150390625, + "eval_compot_loss_xval": 2.98681640625, + "eval_compot_runtime": 74.0009, + "eval_compot_samples_per_second": 0.676, + "eval_compot_steps_per_second": 0.027, + "num_input_tokens_seen": 182123980, + "step": 2750 + }, + { + "epoch": 0.2574062807132494, + "eval_custom_ui_MAE_all": 0.15309859812259674, + "eval_custom_ui_MAE_x": 0.1523282527923584, + "eval_custom_ui_MAE_y": 0.15386895462870598, + "eval_custom_ui_NUM_probability": 0.9999546706676483, + "eval_custom_ui_loss": 0.9221082925796509, + "eval_custom_ui_loss_ce": 0.21552801877260208, + "eval_custom_ui_loss_num": 0.152130126953125, + "eval_custom_ui_loss_xval": 0.7607421875, + "eval_custom_ui_runtime": 56.3642, + "eval_custom_ui_samples_per_second": 0.887, + "eval_custom_ui_steps_per_second": 0.035, + "num_input_tokens_seen": 182123980, + "step": 2750 + }, + { + "epoch": 0.2574062807132494, + "loss": 0.9935225248336792, + "loss_ce": 0.24449913203716278, + "loss_iou": 0.0, + "loss_num": 0.1494140625, + "loss_xval": 0.75, + "num_input_tokens_seen": 182123980, + "step": 2750 + }, + { + "epoch": 0.25749988299714516, + "grad_norm": 11.523423194885254, + "learning_rate": 5e-05, + "loss": 0.9672, + "num_input_tokens_seen": 182188460, + "step": 2751 + }, + { + "epoch": 0.25749988299714516, + "loss": 0.9552120566368103, + "loss_ce": 0.0072140078991651535, + "loss_iou": 0.38671875, + "loss_num": 0.034912109375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 182188460, + "step": 2751 + }, + { + "epoch": 0.25759348528104087, + "grad_norm": 19.382591247558594, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 182254364, + "step": 2752 + }, + { + "epoch": 0.25759348528104087, + "loss": 1.4123663902282715, + "loss_ce": 0.004651571623980999, + "loss_iou": 0.53515625, + "loss_num": 0.06787109375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 182254364, + "step": 2752 + }, + { + "epoch": 0.2576870875649366, + "grad_norm": 24.27301788330078, + "learning_rate": 5e-05, + "loss": 1.3711, + "num_input_tokens_seen": 182320480, + "step": 2753 + }, + { + "epoch": 0.2576870875649366, + "loss": 1.2736258506774902, + "loss_ce": 0.0031180973164737225, + "loss_iou": 0.55078125, + "loss_num": 0.034423828125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 182320480, + "step": 2753 + }, + { + "epoch": 0.2577806898488323, + "grad_norm": 23.738521575927734, + "learning_rate": 5e-05, + "loss": 1.4014, + "num_input_tokens_seen": 182386000, + "step": 2754 + }, + { + "epoch": 0.2577806898488323, + "loss": 1.3582301139831543, + "loss_ce": 0.005202867090702057, + "loss_iou": 0.59375, + "loss_num": 0.0322265625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 182386000, + "step": 2754 + }, + { + "epoch": 0.25787429213272806, + "grad_norm": 21.385990142822266, + "learning_rate": 5e-05, + "loss": 1.5267, + "num_input_tokens_seen": 182451800, + "step": 2755 + }, + { + "epoch": 0.25787429213272806, + "loss": 1.4563599824905396, + "loss_ce": 0.0061646560207009315, + "loss_iou": 0.63671875, + "loss_num": 0.034912109375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 182451800, + "step": 2755 + }, + { + "epoch": 0.25796789441662377, + "grad_norm": 39.32674026489258, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 182518708, + "step": 2756 + }, + { + "epoch": 0.25796789441662377, + "loss": 1.2763360738754272, + "loss_ce": 0.005828249268233776, + "loss_iou": 0.52734375, + "loss_num": 0.04248046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 182518708, + "step": 2756 + }, + { + "epoch": 0.2580614967005195, + "grad_norm": 22.322223663330078, + "learning_rate": 5e-05, + "loss": 1.6627, + "num_input_tokens_seen": 182585652, + "step": 2757 + }, + { + "epoch": 0.2580614967005195, + "loss": 1.9377355575561523, + "loss_ce": 0.005118353758007288, + "loss_iou": 0.7421875, + "loss_num": 0.08984375, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 182585652, + "step": 2757 + }, + { + "epoch": 0.25815509898441524, + "grad_norm": 15.85788631439209, + "learning_rate": 5e-05, + "loss": 1.1743, + "num_input_tokens_seen": 182653176, + "step": 2758 + }, + { + "epoch": 0.25815509898441524, + "loss": 1.165595293045044, + "loss_ce": 0.005927319638431072, + "loss_iou": 0.466796875, + "loss_num": 0.04443359375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 182653176, + "step": 2758 + }, + { + "epoch": 0.25824870126831095, + "grad_norm": 28.82282066345215, + "learning_rate": 5e-05, + "loss": 1.3383, + "num_input_tokens_seen": 182719236, + "step": 2759 + }, + { + "epoch": 0.25824870126831095, + "loss": 1.2076382637023926, + "loss_ce": 0.003536621108651161, + "loss_iou": 0.4921875, + "loss_num": 0.04345703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 182719236, + "step": 2759 + }, + { + "epoch": 0.25834230355220666, + "grad_norm": 24.307004928588867, + "learning_rate": 5e-05, + "loss": 1.4537, + "num_input_tokens_seen": 182784992, + "step": 2760 + }, + { + "epoch": 0.25834230355220666, + "loss": 1.4684849977493286, + "loss_ce": 0.004373685456812382, + "loss_iou": 0.625, + "loss_num": 0.04296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 182784992, + "step": 2760 + }, + { + "epoch": 0.2584359058361024, + "grad_norm": 20.741451263427734, + "learning_rate": 5e-05, + "loss": 1.3699, + "num_input_tokens_seen": 182851400, + "step": 2761 + }, + { + "epoch": 0.2584359058361024, + "loss": 1.5804247856140137, + "loss_ce": 0.003276292234659195, + "loss_iou": 0.60546875, + "loss_num": 0.0732421875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 182851400, + "step": 2761 + }, + { + "epoch": 0.25852950811999814, + "grad_norm": 25.782926559448242, + "learning_rate": 5e-05, + "loss": 1.4695, + "num_input_tokens_seen": 182917160, + "step": 2762 + }, + { + "epoch": 0.25852950811999814, + "loss": 1.7858836650848389, + "loss_ce": 0.003657030873000622, + "loss_iou": 0.6640625, + "loss_num": 0.0908203125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 182917160, + "step": 2762 + }, + { + "epoch": 0.25862311040389385, + "grad_norm": 17.015609741210938, + "learning_rate": 5e-05, + "loss": 1.2913, + "num_input_tokens_seen": 182984212, + "step": 2763 + }, + { + "epoch": 0.25862311040389385, + "loss": 1.214355707168579, + "loss_ce": 0.003906491212546825, + "loss_iou": 0.5, + "loss_num": 0.04150390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 182984212, + "step": 2763 + }, + { + "epoch": 0.25871671268778956, + "grad_norm": 42.560546875, + "learning_rate": 5e-05, + "loss": 1.4358, + "num_input_tokens_seen": 183050128, + "step": 2764 + }, + { + "epoch": 0.25871671268778956, + "loss": 1.6791510581970215, + "loss_ce": 0.009229252114892006, + "loss_iou": 0.6875, + "loss_num": 0.059326171875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 183050128, + "step": 2764 + }, + { + "epoch": 0.25881031497168533, + "grad_norm": 31.24652862548828, + "learning_rate": 5e-05, + "loss": 1.3654, + "num_input_tokens_seen": 183116300, + "step": 2765 + }, + { + "epoch": 0.25881031497168533, + "loss": 1.324753761291504, + "loss_ce": 0.0024880152195692062, + "loss_iou": 0.58203125, + "loss_num": 0.0322265625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 183116300, + "step": 2765 + }, + { + "epoch": 0.25890391725558104, + "grad_norm": 39.36247634887695, + "learning_rate": 5e-05, + "loss": 1.5224, + "num_input_tokens_seen": 183182028, + "step": 2766 + }, + { + "epoch": 0.25890391725558104, + "loss": 1.3314727544784546, + "loss_ce": 0.0013946664985269308, + "loss_iou": 0.55859375, + "loss_num": 0.041748046875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 183182028, + "step": 2766 + }, + { + "epoch": 0.25899751953947675, + "grad_norm": 56.544677734375, + "learning_rate": 5e-05, + "loss": 1.494, + "num_input_tokens_seen": 183247356, + "step": 2767 + }, + { + "epoch": 0.25899751953947675, + "loss": 1.3784685134887695, + "loss_ce": 0.006093028001487255, + "loss_iou": 0.54296875, + "loss_num": 0.056884765625, + "loss_xval": 1.375, + "num_input_tokens_seen": 183247356, + "step": 2767 + }, + { + "epoch": 0.2590911218233725, + "grad_norm": 27.066333770751953, + "learning_rate": 5e-05, + "loss": 1.5124, + "num_input_tokens_seen": 183312848, + "step": 2768 + }, + { + "epoch": 0.2590911218233725, + "loss": 1.449023962020874, + "loss_ce": 0.003711439436301589, + "loss_iou": 0.6015625, + "loss_num": 0.04736328125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 183312848, + "step": 2768 + }, + { + "epoch": 0.2591847241072682, + "grad_norm": 26.6560115814209, + "learning_rate": 5e-05, + "loss": 1.336, + "num_input_tokens_seen": 183378696, + "step": 2769 + }, + { + "epoch": 0.2591847241072682, + "loss": 1.3047513961791992, + "loss_ce": 0.003665025345981121, + "loss_iou": 0.5234375, + "loss_num": 0.051025390625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 183378696, + "step": 2769 + }, + { + "epoch": 0.25927832639116394, + "grad_norm": 30.571502685546875, + "learning_rate": 5e-05, + "loss": 1.053, + "num_input_tokens_seen": 183444588, + "step": 2770 + }, + { + "epoch": 0.25927832639116394, + "loss": 1.1597281694412231, + "loss_ce": 0.0010368122020736337, + "loss_iou": 0.515625, + "loss_num": 0.025634765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 183444588, + "step": 2770 + }, + { + "epoch": 0.25937192867505965, + "grad_norm": 30.87347984313965, + "learning_rate": 5e-05, + "loss": 1.4284, + "num_input_tokens_seen": 183511516, + "step": 2771 + }, + { + "epoch": 0.25937192867505965, + "loss": 1.3620257377624512, + "loss_ce": 0.011439764872193336, + "loss_iou": 0.59765625, + "loss_num": 0.030517578125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 183511516, + "step": 2771 + }, + { + "epoch": 0.2594655309589554, + "grad_norm": 25.916946411132812, + "learning_rate": 5e-05, + "loss": 1.5079, + "num_input_tokens_seen": 183578912, + "step": 2772 + }, + { + "epoch": 0.2594655309589554, + "loss": 1.4635601043701172, + "loss_ce": 0.0035991547629237175, + "loss_iou": 0.62890625, + "loss_num": 0.039794921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 183578912, + "step": 2772 + }, + { + "epoch": 0.2595591332428511, + "grad_norm": 19.248422622680664, + "learning_rate": 5e-05, + "loss": 1.3325, + "num_input_tokens_seen": 183645392, + "step": 2773 + }, + { + "epoch": 0.2595591332428511, + "loss": 1.1251640319824219, + "loss_ce": 0.005046752281486988, + "loss_iou": 0.498046875, + "loss_num": 0.025390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 183645392, + "step": 2773 + }, + { + "epoch": 0.25965273552674684, + "grad_norm": 32.65757369995117, + "learning_rate": 5e-05, + "loss": 1.1309, + "num_input_tokens_seen": 183712312, + "step": 2774 + }, + { + "epoch": 0.25965273552674684, + "loss": 1.0271244049072266, + "loss_ce": 0.0031376397237181664, + "loss_iou": 0.416015625, + "loss_num": 0.03857421875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 183712312, + "step": 2774 + }, + { + "epoch": 0.2597463378106426, + "grad_norm": 25.546913146972656, + "learning_rate": 5e-05, + "loss": 1.5667, + "num_input_tokens_seen": 183778704, + "step": 2775 + }, + { + "epoch": 0.2597463378106426, + "loss": 1.499668002128601, + "loss_ce": 0.0030860367696732283, + "loss_iou": 0.6484375, + "loss_num": 0.040283203125, + "loss_xval": 1.5, + "num_input_tokens_seen": 183778704, + "step": 2775 + }, + { + "epoch": 0.2598399400945383, + "grad_norm": 22.19900894165039, + "learning_rate": 5e-05, + "loss": 1.2377, + "num_input_tokens_seen": 183844476, + "step": 2776 + }, + { + "epoch": 0.2598399400945383, + "loss": 1.196016550064087, + "loss_ce": 0.010469729080796242, + "loss_iou": 0.50390625, + "loss_num": 0.0361328125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 183844476, + "step": 2776 + }, + { + "epoch": 0.259933542378434, + "grad_norm": 26.540695190429688, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 183911088, + "step": 2777 + }, + { + "epoch": 0.259933542378434, + "loss": 1.228979468345642, + "loss_ce": 0.004858322907239199, + "loss_iou": 0.51953125, + "loss_num": 0.036376953125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 183911088, + "step": 2777 + }, + { + "epoch": 0.26002714466232973, + "grad_norm": 19.824424743652344, + "learning_rate": 5e-05, + "loss": 1.6691, + "num_input_tokens_seen": 183974276, + "step": 2778 + }, + { + "epoch": 0.26002714466232973, + "loss": 1.5102863311767578, + "loss_ce": 0.005037275142967701, + "loss_iou": 0.64453125, + "loss_num": 0.0439453125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 183974276, + "step": 2778 + }, + { + "epoch": 0.2601207469462255, + "grad_norm": 17.756515502929688, + "learning_rate": 5e-05, + "loss": 1.3528, + "num_input_tokens_seen": 184040060, + "step": 2779 + }, + { + "epoch": 0.2601207469462255, + "loss": 1.3978538513183594, + "loss_ce": 0.009182073175907135, + "loss_iou": 0.5234375, + "loss_num": 0.06884765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 184040060, + "step": 2779 + }, + { + "epoch": 0.2602143492301212, + "grad_norm": 26.4755802154541, + "learning_rate": 5e-05, + "loss": 1.2616, + "num_input_tokens_seen": 184106404, + "step": 2780 + }, + { + "epoch": 0.2602143492301212, + "loss": 1.2944135665893555, + "loss_ce": 0.008280863054096699, + "loss_iou": 0.5390625, + "loss_num": 0.041259765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 184106404, + "step": 2780 + }, + { + "epoch": 0.2603079515140169, + "grad_norm": 22.432933807373047, + "learning_rate": 5e-05, + "loss": 1.4327, + "num_input_tokens_seen": 184172288, + "step": 2781 + }, + { + "epoch": 0.2603079515140169, + "loss": 1.4781384468078613, + "loss_ce": 0.004505617544054985, + "loss_iou": 0.609375, + "loss_num": 0.05078125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 184172288, + "step": 2781 + }, + { + "epoch": 0.2604015537979127, + "grad_norm": 19.73869514465332, + "learning_rate": 5e-05, + "loss": 1.1615, + "num_input_tokens_seen": 184238320, + "step": 2782 + }, + { + "epoch": 0.2604015537979127, + "loss": 1.0642637014389038, + "loss_ce": 0.004571388475596905, + "loss_iou": 0.419921875, + "loss_num": 0.044189453125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 184238320, + "step": 2782 + }, + { + "epoch": 0.2604951560818084, + "grad_norm": 31.140819549560547, + "learning_rate": 5e-05, + "loss": 1.4513, + "num_input_tokens_seen": 184305112, + "step": 2783 + }, + { + "epoch": 0.2604951560818084, + "loss": 1.5943620204925537, + "loss_ce": 0.0054949019104242325, + "loss_iou": 0.65625, + "loss_num": 0.05517578125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 184305112, + "step": 2783 + }, + { + "epoch": 0.2605887583657041, + "grad_norm": 80.94667053222656, + "learning_rate": 5e-05, + "loss": 1.1728, + "num_input_tokens_seen": 184371380, + "step": 2784 + }, + { + "epoch": 0.2605887583657041, + "loss": 1.103699803352356, + "loss_ce": 0.005067044869065285, + "loss_iou": 0.455078125, + "loss_num": 0.037353515625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 184371380, + "step": 2784 + }, + { + "epoch": 0.2606823606495999, + "grad_norm": 25.863245010375977, + "learning_rate": 5e-05, + "loss": 1.5235, + "num_input_tokens_seen": 184437636, + "step": 2785 + }, + { + "epoch": 0.2606823606495999, + "loss": 1.53908109664917, + "loss_ce": 0.01027250662446022, + "loss_iou": 0.6328125, + "loss_num": 0.052734375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 184437636, + "step": 2785 + }, + { + "epoch": 0.2607759629334956, + "grad_norm": 10.690145492553711, + "learning_rate": 5e-05, + "loss": 1.1675, + "num_input_tokens_seen": 184502828, + "step": 2786 + }, + { + "epoch": 0.2607759629334956, + "loss": 1.0218708515167236, + "loss_ce": 0.0071919020265340805, + "loss_iou": 0.4375, + "loss_num": 0.0277099609375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 184502828, + "step": 2786 + }, + { + "epoch": 0.2608695652173913, + "grad_norm": 13.819395065307617, + "learning_rate": 5e-05, + "loss": 1.263, + "num_input_tokens_seen": 184569736, + "step": 2787 + }, + { + "epoch": 0.2608695652173913, + "loss": 1.0895116329193115, + "loss_ce": 0.007480449974536896, + "loss_iou": 0.4609375, + "loss_num": 0.031982421875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 184569736, + "step": 2787 + }, + { + "epoch": 0.260963167501287, + "grad_norm": 22.60026741027832, + "learning_rate": 5e-05, + "loss": 1.3177, + "num_input_tokens_seen": 184635668, + "step": 2788 + }, + { + "epoch": 0.260963167501287, + "loss": 1.3702020645141602, + "loss_ce": 0.0054559423588216305, + "loss_iou": 0.55078125, + "loss_num": 0.052001953125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 184635668, + "step": 2788 + }, + { + "epoch": 0.2610567697851828, + "grad_norm": 30.772146224975586, + "learning_rate": 5e-05, + "loss": 1.313, + "num_input_tokens_seen": 184702996, + "step": 2789 + }, + { + "epoch": 0.2610567697851828, + "loss": 1.282770037651062, + "loss_ce": 0.004937991499900818, + "loss_iou": 0.5546875, + "loss_num": 0.0341796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 184702996, + "step": 2789 + }, + { + "epoch": 0.2611503720690785, + "grad_norm": 21.066852569580078, + "learning_rate": 5e-05, + "loss": 1.708, + "num_input_tokens_seen": 184769136, + "step": 2790 + }, + { + "epoch": 0.2611503720690785, + "loss": 1.485233187675476, + "loss_ce": 0.0057409899309277534, + "loss_iou": 0.6328125, + "loss_num": 0.042236328125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 184769136, + "step": 2790 + }, + { + "epoch": 0.2612439743529742, + "grad_norm": 23.4888973236084, + "learning_rate": 5e-05, + "loss": 1.2869, + "num_input_tokens_seen": 184836248, + "step": 2791 + }, + { + "epoch": 0.2612439743529742, + "loss": 1.3349745273590088, + "loss_ce": 0.002943230327218771, + "loss_iou": 0.546875, + "loss_num": 0.047607421875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 184836248, + "step": 2791 + }, + { + "epoch": 0.26133757663686996, + "grad_norm": 23.718120574951172, + "learning_rate": 5e-05, + "loss": 1.6147, + "num_input_tokens_seen": 184903464, + "step": 2792 + }, + { + "epoch": 0.26133757663686996, + "loss": 1.5826460123062134, + "loss_ce": 0.003544393926858902, + "loss_iou": 0.66796875, + "loss_num": 0.048828125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 184903464, + "step": 2792 + }, + { + "epoch": 0.26143117892076567, + "grad_norm": 20.823057174682617, + "learning_rate": 5e-05, + "loss": 1.4117, + "num_input_tokens_seen": 184970044, + "step": 2793 + }, + { + "epoch": 0.26143117892076567, + "loss": 1.2039194107055664, + "loss_ce": 0.004212336614727974, + "loss_iou": 0.486328125, + "loss_num": 0.045166015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 184970044, + "step": 2793 + }, + { + "epoch": 0.2615247812046614, + "grad_norm": 36.8116340637207, + "learning_rate": 5e-05, + "loss": 1.4354, + "num_input_tokens_seen": 185035952, + "step": 2794 + }, + { + "epoch": 0.2615247812046614, + "loss": 1.3722023963928223, + "loss_ce": 0.0069679622538387775, + "loss_iou": 0.55859375, + "loss_num": 0.0498046875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 185035952, + "step": 2794 + }, + { + "epoch": 0.2616183834885571, + "grad_norm": 24.805438995361328, + "learning_rate": 5e-05, + "loss": 1.5389, + "num_input_tokens_seen": 185103444, + "step": 2795 + }, + { + "epoch": 0.2616183834885571, + "loss": 1.482750654220581, + "loss_ce": 0.004235108382999897, + "loss_iou": 0.61328125, + "loss_num": 0.049560546875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 185103444, + "step": 2795 + }, + { + "epoch": 0.26171198577245286, + "grad_norm": 32.0396614074707, + "learning_rate": 5e-05, + "loss": 1.4879, + "num_input_tokens_seen": 185169992, + "step": 2796 + }, + { + "epoch": 0.26171198577245286, + "loss": 1.4500646591186523, + "loss_ce": 0.005728684365749359, + "loss_iou": 0.640625, + "loss_num": 0.0322265625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 185169992, + "step": 2796 + }, + { + "epoch": 0.26180558805634857, + "grad_norm": 30.2569637298584, + "learning_rate": 5e-05, + "loss": 1.5313, + "num_input_tokens_seen": 185236976, + "step": 2797 + }, + { + "epoch": 0.26180558805634857, + "loss": 1.5174522399902344, + "loss_ce": 0.00719832070171833, + "loss_iou": 0.6484375, + "loss_num": 0.042236328125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 185236976, + "step": 2797 + }, + { + "epoch": 0.2618991903402443, + "grad_norm": 21.909534454345703, + "learning_rate": 5e-05, + "loss": 1.3901, + "num_input_tokens_seen": 185302904, + "step": 2798 + }, + { + "epoch": 0.2618991903402443, + "loss": 1.467483639717102, + "loss_ce": 0.005081304349005222, + "loss_iou": 0.55859375, + "loss_num": 0.068359375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 185302904, + "step": 2798 + }, + { + "epoch": 0.26199279262414005, + "grad_norm": 21.550695419311523, + "learning_rate": 5e-05, + "loss": 1.1799, + "num_input_tokens_seen": 185369352, + "step": 2799 + }, + { + "epoch": 0.26199279262414005, + "loss": 1.1303417682647705, + "loss_ce": 0.0038770241662859917, + "loss_iou": 0.486328125, + "loss_num": 0.0308837890625, + "loss_xval": 1.125, + "num_input_tokens_seen": 185369352, + "step": 2799 + }, + { + "epoch": 0.26208639490803576, + "grad_norm": 18.965072631835938, + "learning_rate": 5e-05, + "loss": 1.3469, + "num_input_tokens_seen": 185433792, + "step": 2800 + }, + { + "epoch": 0.26208639490803576, + "loss": 1.271073341369629, + "loss_ce": 0.003495211945846677, + "loss_iou": 0.53515625, + "loss_num": 0.039794921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 185433792, + "step": 2800 + }, + { + "epoch": 0.26217999719193147, + "grad_norm": 14.17694091796875, + "learning_rate": 5e-05, + "loss": 1.3766, + "num_input_tokens_seen": 185500448, + "step": 2801 + }, + { + "epoch": 0.26217999719193147, + "loss": 1.3449500799179077, + "loss_ce": 0.0065711867064237595, + "loss_iou": 0.5625, + "loss_num": 0.0419921875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 185500448, + "step": 2801 + }, + { + "epoch": 0.26227359947582723, + "grad_norm": 21.094636917114258, + "learning_rate": 5e-05, + "loss": 1.1819, + "num_input_tokens_seen": 185566104, + "step": 2802 + }, + { + "epoch": 0.26227359947582723, + "loss": 1.209665060043335, + "loss_ce": 0.0053192367777228355, + "loss_iou": 0.5, + "loss_num": 0.041259765625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 185566104, + "step": 2802 + }, + { + "epoch": 0.26236720175972295, + "grad_norm": 35.77684020996094, + "learning_rate": 5e-05, + "loss": 1.611, + "num_input_tokens_seen": 185631140, + "step": 2803 + }, + { + "epoch": 0.26236720175972295, + "loss": 1.7878296375274658, + "loss_ce": 0.006579712964594364, + "loss_iou": 0.74609375, + "loss_num": 0.0576171875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 185631140, + "step": 2803 + }, + { + "epoch": 0.26246080404361866, + "grad_norm": 22.20318031311035, + "learning_rate": 5e-05, + "loss": 1.5298, + "num_input_tokens_seen": 185697824, + "step": 2804 + }, + { + "epoch": 0.26246080404361866, + "loss": 1.627734661102295, + "loss_ce": 0.00859402772039175, + "loss_iou": 0.66796875, + "loss_num": 0.05712890625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 185697824, + "step": 2804 + }, + { + "epoch": 0.26255440632751437, + "grad_norm": 31.52370834350586, + "learning_rate": 5e-05, + "loss": 1.2775, + "num_input_tokens_seen": 185763704, + "step": 2805 + }, + { + "epoch": 0.26255440632751437, + "loss": 1.156463384628296, + "loss_ce": 0.003936439752578735, + "loss_iou": 0.482421875, + "loss_num": 0.037353515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 185763704, + "step": 2805 + }, + { + "epoch": 0.26264800861141013, + "grad_norm": 34.81428527832031, + "learning_rate": 5e-05, + "loss": 1.4791, + "num_input_tokens_seen": 185830772, + "step": 2806 + }, + { + "epoch": 0.26264800861141013, + "loss": 1.4220342636108398, + "loss_ce": 0.004553778562694788, + "loss_iou": 0.5859375, + "loss_num": 0.0498046875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 185830772, + "step": 2806 + }, + { + "epoch": 0.26274161089530584, + "grad_norm": 23.893314361572266, + "learning_rate": 5e-05, + "loss": 1.1632, + "num_input_tokens_seen": 185897716, + "step": 2807 + }, + { + "epoch": 0.26274161089530584, + "loss": 1.2179107666015625, + "loss_ce": 0.003066919045522809, + "loss_iou": 0.5546875, + "loss_num": 0.020751953125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 185897716, + "step": 2807 + }, + { + "epoch": 0.26283521317920155, + "grad_norm": 13.81638240814209, + "learning_rate": 5e-05, + "loss": 1.3039, + "num_input_tokens_seen": 185962712, + "step": 2808 + }, + { + "epoch": 0.26283521317920155, + "loss": 1.4558379650115967, + "loss_ce": 0.011990239843726158, + "loss_iou": 0.56640625, + "loss_num": 0.06201171875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 185962712, + "step": 2808 + }, + { + "epoch": 0.2629288154630973, + "grad_norm": 65.97924041748047, + "learning_rate": 5e-05, + "loss": 1.4595, + "num_input_tokens_seen": 186029268, + "step": 2809 + }, + { + "epoch": 0.2629288154630973, + "loss": 1.5803302526474, + "loss_ce": 0.005623304285109043, + "loss_iou": 0.62890625, + "loss_num": 0.06396484375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 186029268, + "step": 2809 + }, + { + "epoch": 0.26302241774699303, + "grad_norm": 26.90452766418457, + "learning_rate": 5e-05, + "loss": 1.2901, + "num_input_tokens_seen": 186095640, + "step": 2810 + }, + { + "epoch": 0.26302241774699303, + "loss": 1.1605780124664307, + "loss_ce": 0.004038172774016857, + "loss_iou": 0.498046875, + "loss_num": 0.0322265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 186095640, + "step": 2810 + }, + { + "epoch": 0.26311602003088874, + "grad_norm": 42.861717224121094, + "learning_rate": 5e-05, + "loss": 1.3228, + "num_input_tokens_seen": 186161412, + "step": 2811 + }, + { + "epoch": 0.26311602003088874, + "loss": 1.1959176063537598, + "loss_ce": 0.0030464723240584135, + "loss_iou": 0.474609375, + "loss_num": 0.048583984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 186161412, + "step": 2811 + }, + { + "epoch": 0.2632096223147845, + "grad_norm": 26.436466217041016, + "learning_rate": 5e-05, + "loss": 1.3872, + "num_input_tokens_seen": 186227896, + "step": 2812 + }, + { + "epoch": 0.2632096223147845, + "loss": 1.3306233882904053, + "loss_ce": 0.00835779495537281, + "loss_iou": 0.53125, + "loss_num": 0.05224609375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 186227896, + "step": 2812 + }, + { + "epoch": 0.2633032245986802, + "grad_norm": 26.288841247558594, + "learning_rate": 5e-05, + "loss": 1.6034, + "num_input_tokens_seen": 186293408, + "step": 2813 + }, + { + "epoch": 0.2633032245986802, + "loss": 1.5791151523590088, + "loss_ce": 0.003919846843928099, + "loss_iou": 0.625, + "loss_num": 0.0654296875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 186293408, + "step": 2813 + }, + { + "epoch": 0.26339682688257593, + "grad_norm": 18.455432891845703, + "learning_rate": 5e-05, + "loss": 1.313, + "num_input_tokens_seen": 186359436, + "step": 2814 + }, + { + "epoch": 0.26339682688257593, + "loss": 1.1406774520874023, + "loss_ce": 0.002493802923709154, + "loss_iou": 0.49609375, + "loss_num": 0.029052734375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 186359436, + "step": 2814 + }, + { + "epoch": 0.26349042916647164, + "grad_norm": 22.164289474487305, + "learning_rate": 5e-05, + "loss": 1.6511, + "num_input_tokens_seen": 186426008, + "step": 2815 + }, + { + "epoch": 0.26349042916647164, + "loss": 1.6130539178848267, + "loss_ce": 0.006364461965858936, + "loss_iou": 0.6171875, + "loss_num": 0.0751953125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 186426008, + "step": 2815 + }, + { + "epoch": 0.2635840314503674, + "grad_norm": 23.62710189819336, + "learning_rate": 5e-05, + "loss": 1.2416, + "num_input_tokens_seen": 186492532, + "step": 2816 + }, + { + "epoch": 0.2635840314503674, + "loss": 1.4303010702133179, + "loss_ce": 0.0054963938891887665, + "loss_iou": 0.5703125, + "loss_num": 0.0576171875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 186492532, + "step": 2816 + }, + { + "epoch": 0.2636776337342631, + "grad_norm": 28.17098617553711, + "learning_rate": 5e-05, + "loss": 1.4781, + "num_input_tokens_seen": 186559828, + "step": 2817 + }, + { + "epoch": 0.2636776337342631, + "loss": 1.3736687898635864, + "loss_ce": 0.004039898049086332, + "loss_iou": 0.5859375, + "loss_num": 0.03955078125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 186559828, + "step": 2817 + }, + { + "epoch": 0.26377123601815883, + "grad_norm": 38.10930252075195, + "learning_rate": 5e-05, + "loss": 1.3573, + "num_input_tokens_seen": 186625564, + "step": 2818 + }, + { + "epoch": 0.26377123601815883, + "loss": 1.46852445602417, + "loss_ce": 0.003680661553516984, + "loss_iou": 0.5859375, + "loss_num": 0.058837890625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 186625564, + "step": 2818 + }, + { + "epoch": 0.2638648383020546, + "grad_norm": 21.143159866333008, + "learning_rate": 5e-05, + "loss": 1.6812, + "num_input_tokens_seen": 186691476, + "step": 2819 + }, + { + "epoch": 0.2638648383020546, + "loss": 1.597029447555542, + "loss_ce": 0.004744234029203653, + "loss_iou": 0.6953125, + "loss_num": 0.039794921875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 186691476, + "step": 2819 + }, + { + "epoch": 0.2639584405859503, + "grad_norm": 20.390413284301758, + "learning_rate": 5e-05, + "loss": 1.4927, + "num_input_tokens_seen": 186757432, + "step": 2820 + }, + { + "epoch": 0.2639584405859503, + "loss": 1.5646171569824219, + "loss_ce": 0.004558563232421875, + "loss_iou": 0.6328125, + "loss_num": 0.057861328125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 186757432, + "step": 2820 + }, + { + "epoch": 0.264052042869846, + "grad_norm": 30.135969161987305, + "learning_rate": 5e-05, + "loss": 1.3072, + "num_input_tokens_seen": 186823556, + "step": 2821 + }, + { + "epoch": 0.264052042869846, + "loss": 1.3670326471328735, + "loss_ce": 0.009122512303292751, + "loss_iou": 0.54296875, + "loss_num": 0.054443359375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 186823556, + "step": 2821 + }, + { + "epoch": 0.2641456451537417, + "grad_norm": 23.44801139831543, + "learning_rate": 5e-05, + "loss": 1.393, + "num_input_tokens_seen": 186889228, + "step": 2822 + }, + { + "epoch": 0.2641456451537417, + "loss": 1.400672197341919, + "loss_ce": 0.0063850851729512215, + "loss_iou": 0.5703125, + "loss_num": 0.05029296875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 186889228, + "step": 2822 + }, + { + "epoch": 0.2642392474376375, + "grad_norm": 24.048946380615234, + "learning_rate": 5e-05, + "loss": 1.4017, + "num_input_tokens_seen": 186956736, + "step": 2823 + }, + { + "epoch": 0.2642392474376375, + "loss": 1.4926737546920776, + "loss_ce": 0.004392510745674372, + "loss_iou": 0.625, + "loss_num": 0.047607421875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 186956736, + "step": 2823 + }, + { + "epoch": 0.2643328497215332, + "grad_norm": 20.472166061401367, + "learning_rate": 5e-05, + "loss": 1.5982, + "num_input_tokens_seen": 187022132, + "step": 2824 + }, + { + "epoch": 0.2643328497215332, + "loss": 1.6638191938400269, + "loss_ce": 0.00561615452170372, + "loss_iou": 0.69140625, + "loss_num": 0.054931640625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 187022132, + "step": 2824 + }, + { + "epoch": 0.2644264520054289, + "grad_norm": 17.666561126708984, + "learning_rate": 5e-05, + "loss": 1.0993, + "num_input_tokens_seen": 187089604, + "step": 2825 + }, + { + "epoch": 0.2644264520054289, + "loss": 1.113770604133606, + "loss_ce": 0.005860441364347935, + "loss_iou": 0.46484375, + "loss_num": 0.03564453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 187089604, + "step": 2825 + }, + { + "epoch": 0.2645200542893247, + "grad_norm": 20.389936447143555, + "learning_rate": 5e-05, + "loss": 1.4501, + "num_input_tokens_seen": 187155168, + "step": 2826 + }, + { + "epoch": 0.2645200542893247, + "loss": 1.4092261791229248, + "loss_ce": 0.004929243586957455, + "loss_iou": 0.578125, + "loss_num": 0.05029296875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 187155168, + "step": 2826 + }, + { + "epoch": 0.2646136565732204, + "grad_norm": 27.508337020874023, + "learning_rate": 5e-05, + "loss": 1.3513, + "num_input_tokens_seen": 187222568, + "step": 2827 + }, + { + "epoch": 0.2646136565732204, + "loss": 1.421954870223999, + "loss_ce": 0.0069158636033535, + "loss_iou": 0.578125, + "loss_num": 0.0517578125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 187222568, + "step": 2827 + }, + { + "epoch": 0.2647072588571161, + "grad_norm": 25.480224609375, + "learning_rate": 5e-05, + "loss": 1.5858, + "num_input_tokens_seen": 187288552, + "step": 2828 + }, + { + "epoch": 0.2647072588571161, + "loss": 1.385055422782898, + "loss_ce": 0.0032194985542446375, + "loss_iou": 0.59375, + "loss_num": 0.039306640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 187288552, + "step": 2828 + }, + { + "epoch": 0.26480086114101187, + "grad_norm": 18.64525604248047, + "learning_rate": 5e-05, + "loss": 1.0948, + "num_input_tokens_seen": 187354668, + "step": 2829 + }, + { + "epoch": 0.26480086114101187, + "loss": 1.1075193881988525, + "loss_ce": 0.004980382043868303, + "loss_iou": 0.458984375, + "loss_num": 0.03759765625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 187354668, + "step": 2829 + }, + { + "epoch": 0.2648944634249076, + "grad_norm": 19.179067611694336, + "learning_rate": 5e-05, + "loss": 1.3061, + "num_input_tokens_seen": 187420132, + "step": 2830 + }, + { + "epoch": 0.2648944634249076, + "loss": 1.288672685623169, + "loss_ce": 0.0030281427316367626, + "loss_iou": 0.53515625, + "loss_num": 0.04345703125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 187420132, + "step": 2830 + }, + { + "epoch": 0.2649880657088033, + "grad_norm": 32.32212829589844, + "learning_rate": 5e-05, + "loss": 1.2404, + "num_input_tokens_seen": 187485640, + "step": 2831 + }, + { + "epoch": 0.2649880657088033, + "loss": 1.1736130714416504, + "loss_ce": 0.003447064198553562, + "loss_iou": 0.50390625, + "loss_num": 0.033203125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 187485640, + "step": 2831 + }, + { + "epoch": 0.265081667992699, + "grad_norm": 20.428314208984375, + "learning_rate": 5e-05, + "loss": 1.5183, + "num_input_tokens_seen": 187552516, + "step": 2832 + }, + { + "epoch": 0.265081667992699, + "loss": 1.547531247138977, + "loss_ce": 0.00749216740950942, + "loss_iou": 0.6484375, + "loss_num": 0.048583984375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 187552516, + "step": 2832 + }, + { + "epoch": 0.26517527027659477, + "grad_norm": 18.128150939941406, + "learning_rate": 5e-05, + "loss": 1.4426, + "num_input_tokens_seen": 187618952, + "step": 2833 + }, + { + "epoch": 0.26517527027659477, + "loss": 1.3896745443344116, + "loss_ce": 0.0039323908276855946, + "loss_iou": 0.5625, + "loss_num": 0.051025390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 187618952, + "step": 2833 + }, + { + "epoch": 0.2652688725604905, + "grad_norm": 21.625513076782227, + "learning_rate": 5e-05, + "loss": 1.3989, + "num_input_tokens_seen": 187684672, + "step": 2834 + }, + { + "epoch": 0.2652688725604905, + "loss": 1.372078537940979, + "loss_ce": 0.003426195355132222, + "loss_iou": 0.55078125, + "loss_num": 0.052734375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 187684672, + "step": 2834 + }, + { + "epoch": 0.2653624748443862, + "grad_norm": 66.1303482055664, + "learning_rate": 5e-05, + "loss": 1.4198, + "num_input_tokens_seen": 187752044, + "step": 2835 + }, + { + "epoch": 0.2653624748443862, + "loss": 1.0787060260772705, + "loss_ce": 0.006684429943561554, + "loss_iou": 0.470703125, + "loss_num": 0.02587890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 187752044, + "step": 2835 + }, + { + "epoch": 0.26545607712828195, + "grad_norm": 24.83431625366211, + "learning_rate": 5e-05, + "loss": 1.4151, + "num_input_tokens_seen": 187818192, + "step": 2836 + }, + { + "epoch": 0.26545607712828195, + "loss": 1.3280391693115234, + "loss_ce": 0.004796979017555714, + "loss_iou": 0.55078125, + "loss_num": 0.044677734375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 187818192, + "step": 2836 + }, + { + "epoch": 0.26554967941217766, + "grad_norm": 29.779151916503906, + "learning_rate": 5e-05, + "loss": 1.5633, + "num_input_tokens_seen": 187884536, + "step": 2837 + }, + { + "epoch": 0.26554967941217766, + "loss": 1.3751624822616577, + "loss_ce": 0.0026038698852062225, + "loss_iou": 0.58984375, + "loss_num": 0.03857421875, + "loss_xval": 1.375, + "num_input_tokens_seen": 187884536, + "step": 2837 + }, + { + "epoch": 0.2656432816960734, + "grad_norm": 19.289663314819336, + "learning_rate": 5e-05, + "loss": 1.2095, + "num_input_tokens_seen": 187951608, + "step": 2838 + }, + { + "epoch": 0.2656432816960734, + "loss": 1.2762291431427002, + "loss_ce": 0.004256377462297678, + "loss_iou": 0.50390625, + "loss_num": 0.052978515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 187951608, + "step": 2838 + }, + { + "epoch": 0.2657368839799691, + "grad_norm": 20.678348541259766, + "learning_rate": 5e-05, + "loss": 1.4225, + "num_input_tokens_seen": 188016236, + "step": 2839 + }, + { + "epoch": 0.2657368839799691, + "loss": 1.25014066696167, + "loss_ce": 0.0045352717861533165, + "loss_iou": 0.5078125, + "loss_num": 0.046142578125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 188016236, + "step": 2839 + }, + { + "epoch": 0.26583048626386485, + "grad_norm": 36.310726165771484, + "learning_rate": 5e-05, + "loss": 1.3749, + "num_input_tokens_seen": 188082668, + "step": 2840 + }, + { + "epoch": 0.26583048626386485, + "loss": 1.3680238723754883, + "loss_ce": 0.005230925511568785, + "loss_iou": 0.56640625, + "loss_num": 0.046142578125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 188082668, + "step": 2840 + }, + { + "epoch": 0.26592408854776056, + "grad_norm": 99.97769927978516, + "learning_rate": 5e-05, + "loss": 1.4957, + "num_input_tokens_seen": 188148920, + "step": 2841 + }, + { + "epoch": 0.26592408854776056, + "loss": 1.4386107921600342, + "loss_ce": 0.006481905467808247, + "loss_iou": 0.59375, + "loss_num": 0.048095703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 188148920, + "step": 2841 + }, + { + "epoch": 0.2660176908316563, + "grad_norm": 34.94300079345703, + "learning_rate": 5e-05, + "loss": 1.1268, + "num_input_tokens_seen": 188215264, + "step": 2842 + }, + { + "epoch": 0.2660176908316563, + "loss": 1.1796425580978394, + "loss_ce": 0.009232343174517155, + "loss_iou": 0.41015625, + "loss_num": 0.06982421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 188215264, + "step": 2842 + }, + { + "epoch": 0.26611129311555204, + "grad_norm": 18.755163192749023, + "learning_rate": 5e-05, + "loss": 1.088, + "num_input_tokens_seen": 188281200, + "step": 2843 + }, + { + "epoch": 0.26611129311555204, + "loss": 0.9051686525344849, + "loss_ce": 0.003557295072823763, + "loss_iou": 0.412109375, + "loss_num": 0.015625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 188281200, + "step": 2843 + }, + { + "epoch": 0.26620489539944775, + "grad_norm": 40.20686721801758, + "learning_rate": 5e-05, + "loss": 1.3989, + "num_input_tokens_seen": 188347904, + "step": 2844 + }, + { + "epoch": 0.26620489539944775, + "loss": 1.5748178958892822, + "loss_ce": 0.008899862878024578, + "loss_iou": 0.625, + "loss_num": 0.0625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 188347904, + "step": 2844 + }, + { + "epoch": 0.26629849768334346, + "grad_norm": 28.67582130432129, + "learning_rate": 5e-05, + "loss": 1.2557, + "num_input_tokens_seen": 188413588, + "step": 2845 + }, + { + "epoch": 0.26629849768334346, + "loss": 1.5271004438400269, + "loss_ce": 0.013428614474833012, + "loss_iou": 0.61328125, + "loss_num": 0.0576171875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 188413588, + "step": 2845 + }, + { + "epoch": 0.2663920999672392, + "grad_norm": 24.36979103088379, + "learning_rate": 5e-05, + "loss": 1.0983, + "num_input_tokens_seen": 188479768, + "step": 2846 + }, + { + "epoch": 0.2663920999672392, + "loss": 1.0076212882995605, + "loss_ce": 0.005423974245786667, + "loss_iou": 0.435546875, + "loss_num": 0.0260009765625, + "loss_xval": 1.0, + "num_input_tokens_seen": 188479768, + "step": 2846 + }, + { + "epoch": 0.26648570225113494, + "grad_norm": 20.857608795166016, + "learning_rate": 5e-05, + "loss": 1.4772, + "num_input_tokens_seen": 188546268, + "step": 2847 + }, + { + "epoch": 0.26648570225113494, + "loss": 1.3241288661956787, + "loss_ce": 0.004792914725840092, + "loss_iou": 0.53515625, + "loss_num": 0.04931640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 188546268, + "step": 2847 + }, + { + "epoch": 0.26657930453503065, + "grad_norm": 21.56261444091797, + "learning_rate": 5e-05, + "loss": 1.242, + "num_input_tokens_seen": 188611944, + "step": 2848 + }, + { + "epoch": 0.26657930453503065, + "loss": 1.2294247150421143, + "loss_ce": 0.007012532092630863, + "loss_iou": 0.51953125, + "loss_num": 0.035888671875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 188611944, + "step": 2848 + }, + { + "epoch": 0.26667290681892636, + "grad_norm": 18.722837448120117, + "learning_rate": 5e-05, + "loss": 1.3164, + "num_input_tokens_seen": 188679540, + "step": 2849 + }, + { + "epoch": 0.26667290681892636, + "loss": 1.3445260524749756, + "loss_ce": 0.0032175127416849136, + "loss_iou": 0.5703125, + "loss_num": 0.040771484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 188679540, + "step": 2849 + }, + { + "epoch": 0.2667665091028221, + "grad_norm": 25.41478729248047, + "learning_rate": 5e-05, + "loss": 1.7732, + "num_input_tokens_seen": 188746728, + "step": 2850 + }, + { + "epoch": 0.2667665091028221, + "loss": 1.7352827787399292, + "loss_ce": 0.010673293843865395, + "loss_iou": 0.6953125, + "loss_num": 0.06689453125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 188746728, + "step": 2850 + }, + { + "epoch": 0.26686011138671784, + "grad_norm": 47.63404083251953, + "learning_rate": 5e-05, + "loss": 1.7756, + "num_input_tokens_seen": 188813028, + "step": 2851 + }, + { + "epoch": 0.26686011138671784, + "loss": 1.6079998016357422, + "loss_ce": 0.004484123550355434, + "loss_iou": 0.69140625, + "loss_num": 0.043701171875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 188813028, + "step": 2851 + }, + { + "epoch": 0.26695371367061355, + "grad_norm": 27.597810745239258, + "learning_rate": 5e-05, + "loss": 1.6244, + "num_input_tokens_seen": 188879340, + "step": 2852 + }, + { + "epoch": 0.26695371367061355, + "loss": 1.4877656698226929, + "loss_ce": 0.013156229630112648, + "loss_iou": 0.59375, + "loss_num": 0.05810546875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 188879340, + "step": 2852 + }, + { + "epoch": 0.2670473159545093, + "grad_norm": 20.776538848876953, + "learning_rate": 5e-05, + "loss": 1.1581, + "num_input_tokens_seen": 188945600, + "step": 2853 + }, + { + "epoch": 0.2670473159545093, + "loss": 1.2930817604064941, + "loss_ce": 0.0030426857993006706, + "loss_iou": 0.57421875, + "loss_num": 0.0286865234375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 188945600, + "step": 2853 + }, + { + "epoch": 0.267140918238405, + "grad_norm": 32.74734115600586, + "learning_rate": 5e-05, + "loss": 1.3094, + "num_input_tokens_seen": 189011012, + "step": 2854 + }, + { + "epoch": 0.267140918238405, + "loss": 1.3553078174591064, + "loss_ce": 0.003257071366533637, + "loss_iou": 0.58203125, + "loss_num": 0.037841796875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 189011012, + "step": 2854 + }, + { + "epoch": 0.26723452052230073, + "grad_norm": 24.080137252807617, + "learning_rate": 5e-05, + "loss": 1.308, + "num_input_tokens_seen": 189077788, + "step": 2855 + }, + { + "epoch": 0.26723452052230073, + "loss": 1.4663535356521606, + "loss_ce": 0.005416055675595999, + "loss_iou": 0.59765625, + "loss_num": 0.05322265625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 189077788, + "step": 2855 + }, + { + "epoch": 0.26732812280619644, + "grad_norm": 24.878042221069336, + "learning_rate": 5e-05, + "loss": 1.3854, + "num_input_tokens_seen": 189145404, + "step": 2856 + }, + { + "epoch": 0.26732812280619644, + "loss": 1.4070184230804443, + "loss_ce": 0.004674690775573254, + "loss_iou": 0.6015625, + "loss_num": 0.039306640625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 189145404, + "step": 2856 + }, + { + "epoch": 0.2674217250900922, + "grad_norm": 43.244544982910156, + "learning_rate": 5e-05, + "loss": 1.3105, + "num_input_tokens_seen": 189211512, + "step": 2857 + }, + { + "epoch": 0.2674217250900922, + "loss": 1.3713688850402832, + "loss_ce": 0.002716538030654192, + "loss_iou": 0.59765625, + "loss_num": 0.03466796875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 189211512, + "step": 2857 + }, + { + "epoch": 0.2675153273739879, + "grad_norm": 33.19589614868164, + "learning_rate": 5e-05, + "loss": 1.2343, + "num_input_tokens_seen": 189277824, + "step": 2858 + }, + { + "epoch": 0.2675153273739879, + "loss": 1.409435749053955, + "loss_ce": 0.00709204887971282, + "loss_iou": 0.6015625, + "loss_num": 0.040283203125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 189277824, + "step": 2858 + }, + { + "epoch": 0.26760892965788363, + "grad_norm": 25.327096939086914, + "learning_rate": 5e-05, + "loss": 1.6612, + "num_input_tokens_seen": 189345460, + "step": 2859 + }, + { + "epoch": 0.26760892965788363, + "loss": 1.7268409729003906, + "loss_ce": 0.011997177265584469, + "loss_iou": 0.6875, + "loss_num": 0.0673828125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 189345460, + "step": 2859 + }, + { + "epoch": 0.2677025319417794, + "grad_norm": 35.089046478271484, + "learning_rate": 5e-05, + "loss": 1.2811, + "num_input_tokens_seen": 189411616, + "step": 2860 + }, + { + "epoch": 0.2677025319417794, + "loss": 1.1830344200134277, + "loss_ce": 0.006429228000342846, + "loss_iou": 0.490234375, + "loss_num": 0.039306640625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 189411616, + "step": 2860 + }, + { + "epoch": 0.2677961342256751, + "grad_norm": 22.38773536682129, + "learning_rate": 5e-05, + "loss": 1.3018, + "num_input_tokens_seen": 189478252, + "step": 2861 + }, + { + "epoch": 0.2677961342256751, + "loss": 1.30426824092865, + "loss_ce": 0.005928391590714455, + "loss_iou": 0.54296875, + "loss_num": 0.04150390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 189478252, + "step": 2861 + }, + { + "epoch": 0.2678897365095708, + "grad_norm": 13.499034881591797, + "learning_rate": 5e-05, + "loss": 1.381, + "num_input_tokens_seen": 189544556, + "step": 2862 + }, + { + "epoch": 0.2678897365095708, + "loss": 1.378824234008789, + "loss_ce": 0.004068333189934492, + "loss_iou": 0.53515625, + "loss_num": 0.0615234375, + "loss_xval": 1.375, + "num_input_tokens_seen": 189544556, + "step": 2862 + }, + { + "epoch": 0.2679833387934666, + "grad_norm": 28.108396530151367, + "learning_rate": 5e-05, + "loss": 1.1849, + "num_input_tokens_seen": 189610940, + "step": 2863 + }, + { + "epoch": 0.2679833387934666, + "loss": 1.0894317626953125, + "loss_ce": 0.0059356591664254665, + "loss_iou": 0.453125, + "loss_num": 0.035400390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 189610940, + "step": 2863 + }, + { + "epoch": 0.2680769410773623, + "grad_norm": 30.4018611907959, + "learning_rate": 5e-05, + "loss": 1.3455, + "num_input_tokens_seen": 189677316, + "step": 2864 + }, + { + "epoch": 0.2680769410773623, + "loss": 1.3459986448287964, + "loss_ce": 0.003713519312441349, + "loss_iou": 0.58203125, + "loss_num": 0.035888671875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 189677316, + "step": 2864 + }, + { + "epoch": 0.268170543361258, + "grad_norm": 21.577816009521484, + "learning_rate": 5e-05, + "loss": 1.735, + "num_input_tokens_seen": 189743008, + "step": 2865 + }, + { + "epoch": 0.268170543361258, + "loss": 1.7270687818527222, + "loss_ce": 0.00978353712707758, + "loss_iou": 0.71875, + "loss_num": 0.056640625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 189743008, + "step": 2865 + }, + { + "epoch": 0.2682641456451537, + "grad_norm": 15.325296401977539, + "learning_rate": 5e-05, + "loss": 1.1921, + "num_input_tokens_seen": 189809328, + "step": 2866 + }, + { + "epoch": 0.2682641456451537, + "loss": 1.2022290229797363, + "loss_ce": 0.005939934402704239, + "loss_iou": 0.48046875, + "loss_num": 0.046630859375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 189809328, + "step": 2866 + }, + { + "epoch": 0.2683577479290495, + "grad_norm": 26.48148536682129, + "learning_rate": 5e-05, + "loss": 1.4575, + "num_input_tokens_seen": 189876472, + "step": 2867 + }, + { + "epoch": 0.2683577479290495, + "loss": 1.2837471961975098, + "loss_ce": 0.002985516097396612, + "loss_iou": 0.53515625, + "loss_num": 0.0419921875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 189876472, + "step": 2867 + }, + { + "epoch": 0.2684513502129452, + "grad_norm": 23.65822410583496, + "learning_rate": 5e-05, + "loss": 1.5448, + "num_input_tokens_seen": 189942504, + "step": 2868 + }, + { + "epoch": 0.2684513502129452, + "loss": 1.5133476257324219, + "loss_ce": 0.005535096861422062, + "loss_iou": 0.6328125, + "loss_num": 0.048095703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 189942504, + "step": 2868 + }, + { + "epoch": 0.2685449524968409, + "grad_norm": 35.416561126708984, + "learning_rate": 5e-05, + "loss": 1.5336, + "num_input_tokens_seen": 190008468, + "step": 2869 + }, + { + "epoch": 0.2685449524968409, + "loss": 1.5356783866882324, + "loss_ce": 0.0034517257008701563, + "loss_iou": 0.64453125, + "loss_num": 0.048095703125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 190008468, + "step": 2869 + }, + { + "epoch": 0.26863855478073667, + "grad_norm": 25.929424285888672, + "learning_rate": 5e-05, + "loss": 1.6353, + "num_input_tokens_seen": 190074796, + "step": 2870 + }, + { + "epoch": 0.26863855478073667, + "loss": 1.4969170093536377, + "loss_ce": 0.006682596169412136, + "loss_iou": 0.625, + "loss_num": 0.04736328125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 190074796, + "step": 2870 + }, + { + "epoch": 0.2687321570646324, + "grad_norm": 16.10113525390625, + "learning_rate": 5e-05, + "loss": 1.2961, + "num_input_tokens_seen": 190141280, + "step": 2871 + }, + { + "epoch": 0.2687321570646324, + "loss": 1.2921521663665771, + "loss_ce": 0.006507670972496271, + "loss_iou": 0.53515625, + "loss_num": 0.04296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 190141280, + "step": 2871 + }, + { + "epoch": 0.2688257593485281, + "grad_norm": 19.313873291015625, + "learning_rate": 5e-05, + "loss": 1.3619, + "num_input_tokens_seen": 190207592, + "step": 2872 + }, + { + "epoch": 0.2688257593485281, + "loss": 1.3472394943237305, + "loss_ce": 0.007395853754132986, + "loss_iou": 0.59375, + "loss_num": 0.0302734375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 190207592, + "step": 2872 + }, + { + "epoch": 0.2689193616324238, + "grad_norm": 28.079877853393555, + "learning_rate": 5e-05, + "loss": 1.1623, + "num_input_tokens_seen": 190272680, + "step": 2873 + }, + { + "epoch": 0.2689193616324238, + "loss": 1.1926419734954834, + "loss_ce": 0.006240631453692913, + "loss_iou": 0.4765625, + "loss_num": 0.046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 190272680, + "step": 2873 + }, + { + "epoch": 0.26901296391631957, + "grad_norm": 44.42527389526367, + "learning_rate": 5e-05, + "loss": 1.3441, + "num_input_tokens_seen": 190338988, + "step": 2874 + }, + { + "epoch": 0.26901296391631957, + "loss": 1.4027814865112305, + "loss_ce": 0.004832353442907333, + "loss_iou": 0.6171875, + "loss_num": 0.03369140625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 190338988, + "step": 2874 + }, + { + "epoch": 0.2691065662002153, + "grad_norm": 32.68692398071289, + "learning_rate": 5e-05, + "loss": 1.2964, + "num_input_tokens_seen": 190405276, + "step": 2875 + }, + { + "epoch": 0.2691065662002153, + "loss": 1.1693854331970215, + "loss_ce": 0.0037360701244324446, + "loss_iou": 0.498046875, + "loss_num": 0.033935546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 190405276, + "step": 2875 + }, + { + "epoch": 0.269200168484111, + "grad_norm": 24.40705680847168, + "learning_rate": 5e-05, + "loss": 1.3722, + "num_input_tokens_seen": 190469664, + "step": 2876 + }, + { + "epoch": 0.269200168484111, + "loss": 1.316448450088501, + "loss_ce": 0.0034601371735334396, + "loss_iou": 0.55859375, + "loss_num": 0.0390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 190469664, + "step": 2876 + }, + { + "epoch": 0.26929377076800676, + "grad_norm": 34.18718338012695, + "learning_rate": 5e-05, + "loss": 1.3552, + "num_input_tokens_seen": 190536500, + "step": 2877 + }, + { + "epoch": 0.26929377076800676, + "loss": 1.2667336463928223, + "loss_ce": 0.01185077615082264, + "loss_iou": 0.5234375, + "loss_num": 0.041015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 190536500, + "step": 2877 + }, + { + "epoch": 0.26938737305190247, + "grad_norm": 35.88064193725586, + "learning_rate": 5e-05, + "loss": 1.2269, + "num_input_tokens_seen": 190603388, + "step": 2878 + }, + { + "epoch": 0.26938737305190247, + "loss": 1.1076096296310425, + "loss_ce": 0.007511980831623077, + "loss_iou": 0.494140625, + "loss_num": 0.0220947265625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 190603388, + "step": 2878 + }, + { + "epoch": 0.2694809753357982, + "grad_norm": 19.359575271606445, + "learning_rate": 5e-05, + "loss": 1.5661, + "num_input_tokens_seen": 190668976, + "step": 2879 + }, + { + "epoch": 0.2694809753357982, + "loss": 1.6734386682510376, + "loss_ce": 0.00351678766310215, + "loss_iou": 0.72265625, + "loss_num": 0.04541015625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 190668976, + "step": 2879 + }, + { + "epoch": 0.26957457761969394, + "grad_norm": 28.439434051513672, + "learning_rate": 5e-05, + "loss": 1.3632, + "num_input_tokens_seen": 190734648, + "step": 2880 + }, + { + "epoch": 0.26957457761969394, + "loss": 1.3256981372833252, + "loss_ce": 0.004409067332744598, + "loss_iou": 0.55859375, + "loss_num": 0.041259765625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 190734648, + "step": 2880 + }, + { + "epoch": 0.26966817990358966, + "grad_norm": 37.80298614501953, + "learning_rate": 5e-05, + "loss": 1.6045, + "num_input_tokens_seen": 190802596, + "step": 2881 + }, + { + "epoch": 0.26966817990358966, + "loss": 1.8723331689834595, + "loss_ce": 0.011493292637169361, + "loss_iou": 0.73046875, + "loss_num": 0.080078125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 190802596, + "step": 2881 + }, + { + "epoch": 0.26976178218748537, + "grad_norm": 18.29846954345703, + "learning_rate": 5e-05, + "loss": 1.6557, + "num_input_tokens_seen": 190867928, + "step": 2882 + }, + { + "epoch": 0.26976178218748537, + "loss": 1.7859479188919067, + "loss_ce": 0.005552499555051327, + "loss_iou": 0.671875, + "loss_num": 0.087890625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 190867928, + "step": 2882 + }, + { + "epoch": 0.2698553844713811, + "grad_norm": 32.939884185791016, + "learning_rate": 5e-05, + "loss": 1.3791, + "num_input_tokens_seen": 190933964, + "step": 2883 + }, + { + "epoch": 0.2698553844713811, + "loss": 1.2570348978042603, + "loss_ce": 0.007523206993937492, + "loss_iou": 0.5234375, + "loss_num": 0.040283203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 190933964, + "step": 2883 + }, + { + "epoch": 0.26994898675527684, + "grad_norm": 25.31010627746582, + "learning_rate": 5e-05, + "loss": 1.2881, + "num_input_tokens_seen": 191000160, + "step": 2884 + }, + { + "epoch": 0.26994898675527684, + "loss": 1.1675686836242676, + "loss_ce": 0.0059475041925907135, + "loss_iou": 0.5234375, + "loss_num": 0.0234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 191000160, + "step": 2884 + }, + { + "epoch": 0.27004258903917255, + "grad_norm": 24.51418113708496, + "learning_rate": 5e-05, + "loss": 1.5254, + "num_input_tokens_seen": 191067692, + "step": 2885 + }, + { + "epoch": 0.27004258903917255, + "loss": 1.6295093297958374, + "loss_ce": 0.0035327691584825516, + "loss_iou": 0.6796875, + "loss_num": 0.053466796875, + "loss_xval": 1.625, + "num_input_tokens_seen": 191067692, + "step": 2885 + }, + { + "epoch": 0.27013619132306826, + "grad_norm": 54.28732681274414, + "learning_rate": 5e-05, + "loss": 1.5643, + "num_input_tokens_seen": 191133808, + "step": 2886 + }, + { + "epoch": 0.27013619132306826, + "loss": 1.6064716577529907, + "loss_ce": 0.006618119310587645, + "loss_iou": 0.6796875, + "loss_num": 0.0478515625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 191133808, + "step": 2886 + }, + { + "epoch": 0.27022979360696403, + "grad_norm": 36.00764465332031, + "learning_rate": 5e-05, + "loss": 1.6871, + "num_input_tokens_seen": 191200632, + "step": 2887 + }, + { + "epoch": 0.27022979360696403, + "loss": 1.5753734111785889, + "loss_ce": 0.005549072287976742, + "loss_iou": 0.65625, + "loss_num": 0.052001953125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 191200632, + "step": 2887 + }, + { + "epoch": 0.27032339589085974, + "grad_norm": 17.422168731689453, + "learning_rate": 5e-05, + "loss": 1.3207, + "num_input_tokens_seen": 191266844, + "step": 2888 + }, + { + "epoch": 0.27032339589085974, + "loss": 1.2167034149169922, + "loss_ce": 0.005277687218040228, + "loss_iou": 0.53515625, + "loss_num": 0.0279541015625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 191266844, + "step": 2888 + }, + { + "epoch": 0.27041699817475545, + "grad_norm": 17.126590728759766, + "learning_rate": 5e-05, + "loss": 1.2939, + "num_input_tokens_seen": 191333324, + "step": 2889 + }, + { + "epoch": 0.27041699817475545, + "loss": 1.2734203338623047, + "loss_ce": 0.002912552561610937, + "loss_iou": 0.5546875, + "loss_num": 0.03271484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 191333324, + "step": 2889 + }, + { + "epoch": 0.2705106004586512, + "grad_norm": 18.5518798828125, + "learning_rate": 5e-05, + "loss": 1.3767, + "num_input_tokens_seen": 191400596, + "step": 2890 + }, + { + "epoch": 0.2705106004586512, + "loss": 1.410339117050171, + "loss_ce": 0.004577350337058306, + "loss_iou": 0.58203125, + "loss_num": 0.048828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 191400596, + "step": 2890 + }, + { + "epoch": 0.27060420274254693, + "grad_norm": 21.558935165405273, + "learning_rate": 5e-05, + "loss": 1.539, + "num_input_tokens_seen": 191466084, + "step": 2891 + }, + { + "epoch": 0.27060420274254693, + "loss": 1.5051676034927368, + "loss_ce": 0.006144177168607712, + "loss_iou": 0.66015625, + "loss_num": 0.03564453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 191466084, + "step": 2891 + }, + { + "epoch": 0.27069780502644264, + "grad_norm": 33.934967041015625, + "learning_rate": 5e-05, + "loss": 1.3565, + "num_input_tokens_seen": 191533188, + "step": 2892 + }, + { + "epoch": 0.27069780502644264, + "loss": 1.3920419216156006, + "loss_ce": 0.004346621688455343, + "loss_iou": 0.61328125, + "loss_num": 0.03173828125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 191533188, + "step": 2892 + }, + { + "epoch": 0.27079140731033835, + "grad_norm": 26.771821975708008, + "learning_rate": 5e-05, + "loss": 1.4158, + "num_input_tokens_seen": 191599268, + "step": 2893 + }, + { + "epoch": 0.27079140731033835, + "loss": 1.4195261001586914, + "loss_ce": 0.0064400564879179, + "loss_iou": 0.62890625, + "loss_num": 0.0301513671875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 191599268, + "step": 2893 + }, + { + "epoch": 0.2708850095942341, + "grad_norm": 34.096168518066406, + "learning_rate": 5e-05, + "loss": 1.4672, + "num_input_tokens_seen": 191666396, + "step": 2894 + }, + { + "epoch": 0.2708850095942341, + "loss": 1.256096363067627, + "loss_ce": 0.0021900050342082977, + "loss_iou": 0.546875, + "loss_num": 0.031494140625, + "loss_xval": 1.25, + "num_input_tokens_seen": 191666396, + "step": 2894 + }, + { + "epoch": 0.2709786118781298, + "grad_norm": 22.507471084594727, + "learning_rate": 5e-05, + "loss": 1.5978, + "num_input_tokens_seen": 191732840, + "step": 2895 + }, + { + "epoch": 0.2709786118781298, + "loss": 1.6347594261169434, + "loss_ce": 0.008294529281556606, + "loss_iou": 0.6484375, + "loss_num": 0.06591796875, + "loss_xval": 1.625, + "num_input_tokens_seen": 191732840, + "step": 2895 + }, + { + "epoch": 0.27107221416202554, + "grad_norm": 18.68686866760254, + "learning_rate": 5e-05, + "loss": 1.2171, + "num_input_tokens_seen": 191798476, + "step": 2896 + }, + { + "epoch": 0.27107221416202554, + "loss": 1.451460838317871, + "loss_ce": 0.0056601292453706264, + "loss_iou": 0.578125, + "loss_num": 0.057861328125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 191798476, + "step": 2896 + }, + { + "epoch": 0.2711658164459213, + "grad_norm": 16.7393856048584, + "learning_rate": 5e-05, + "loss": 1.3787, + "num_input_tokens_seen": 191865140, + "step": 2897 + }, + { + "epoch": 0.2711658164459213, + "loss": 1.4760732650756836, + "loss_ce": 0.0029288004152476788, + "loss_iou": 0.58984375, + "loss_num": 0.0595703125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 191865140, + "step": 2897 + }, + { + "epoch": 0.271259418729817, + "grad_norm": 34.328731536865234, + "learning_rate": 5e-05, + "loss": 1.1515, + "num_input_tokens_seen": 191930724, + "step": 2898 + }, + { + "epoch": 0.271259418729817, + "loss": 1.3820092678070068, + "loss_ce": 0.005056167021393776, + "loss_iou": 0.546875, + "loss_num": 0.056396484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 191930724, + "step": 2898 + }, + { + "epoch": 0.2713530210137127, + "grad_norm": 27.414579391479492, + "learning_rate": 5e-05, + "loss": 1.3494, + "num_input_tokens_seen": 191997548, + "step": 2899 + }, + { + "epoch": 0.2713530210137127, + "loss": 1.383734941482544, + "loss_ce": 0.006781915668398142, + "loss_iou": 0.5859375, + "loss_num": 0.041015625, + "loss_xval": 1.375, + "num_input_tokens_seen": 191997548, + "step": 2899 + }, + { + "epoch": 0.27144662329760844, + "grad_norm": 32.842403411865234, + "learning_rate": 5e-05, + "loss": 1.5143, + "num_input_tokens_seen": 192063176, + "step": 2900 + }, + { + "epoch": 0.27144662329760844, + "loss": 1.651855230331421, + "loss_ce": 0.0039059417322278023, + "loss_iou": 0.6796875, + "loss_num": 0.056884765625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 192063176, + "step": 2900 + }, + { + "epoch": 0.2715402255815042, + "grad_norm": 31.889387130737305, + "learning_rate": 5e-05, + "loss": 1.4789, + "num_input_tokens_seen": 192130028, + "step": 2901 + }, + { + "epoch": 0.2715402255815042, + "loss": 1.3662561178207397, + "loss_ce": 0.0039514051750302315, + "loss_iou": 0.58203125, + "loss_num": 0.039306640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 192130028, + "step": 2901 + }, + { + "epoch": 0.2716338278653999, + "grad_norm": 18.446582794189453, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 192197000, + "step": 2902 + }, + { + "epoch": 0.2716338278653999, + "loss": 1.2791718244552612, + "loss_ce": 0.00866398960351944, + "loss_iou": 0.546875, + "loss_num": 0.035400390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 192197000, + "step": 2902 + }, + { + "epoch": 0.2717274301492956, + "grad_norm": 29.704530715942383, + "learning_rate": 5e-05, + "loss": 1.3292, + "num_input_tokens_seen": 192263336, + "step": 2903 + }, + { + "epoch": 0.2717274301492956, + "loss": 1.4902559518814087, + "loss_ce": 0.00832241028547287, + "loss_iou": 0.6484375, + "loss_num": 0.037353515625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 192263336, + "step": 2903 + }, + { + "epoch": 0.2718210324331914, + "grad_norm": 51.86347198486328, + "learning_rate": 5e-05, + "loss": 1.365, + "num_input_tokens_seen": 192329180, + "step": 2904 + }, + { + "epoch": 0.2718210324331914, + "loss": 1.4872848987579346, + "loss_ce": 0.004863027948886156, + "loss_iou": 0.6328125, + "loss_num": 0.04296875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 192329180, + "step": 2904 + }, + { + "epoch": 0.2719146347170871, + "grad_norm": 38.360008239746094, + "learning_rate": 5e-05, + "loss": 1.6924, + "num_input_tokens_seen": 192396212, + "step": 2905 + }, + { + "epoch": 0.2719146347170871, + "loss": 1.7409523725509644, + "loss_ce": 0.00657739769667387, + "loss_iou": 0.72265625, + "loss_num": 0.0576171875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 192396212, + "step": 2905 + }, + { + "epoch": 0.2720082370009828, + "grad_norm": 56.90835952758789, + "learning_rate": 5e-05, + "loss": 1.2948, + "num_input_tokens_seen": 192462316, + "step": 2906 + }, + { + "epoch": 0.2720082370009828, + "loss": 1.3784031867980957, + "loss_ce": 0.0034032172989100218, + "loss_iou": 0.55859375, + "loss_num": 0.05224609375, + "loss_xval": 1.375, + "num_input_tokens_seen": 192462316, + "step": 2906 + }, + { + "epoch": 0.2721018392848786, + "grad_norm": 26.823774337768555, + "learning_rate": 5e-05, + "loss": 1.2908, + "num_input_tokens_seen": 192528460, + "step": 2907 + }, + { + "epoch": 0.2721018392848786, + "loss": 1.345680594444275, + "loss_ce": 0.006813411600887775, + "loss_iou": 0.59375, + "loss_num": 0.0299072265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 192528460, + "step": 2907 + }, + { + "epoch": 0.2721954415687743, + "grad_norm": 28.281461715698242, + "learning_rate": 5e-05, + "loss": 1.5285, + "num_input_tokens_seen": 192594860, + "step": 2908 + }, + { + "epoch": 0.2721954415687743, + "loss": 1.528496265411377, + "loss_ce": 0.005058795213699341, + "loss_iou": 0.64453125, + "loss_num": 0.047119140625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 192594860, + "step": 2908 + }, + { + "epoch": 0.27228904385267, + "grad_norm": 28.82747459411621, + "learning_rate": 5e-05, + "loss": 1.3875, + "num_input_tokens_seen": 192660764, + "step": 2909 + }, + { + "epoch": 0.27228904385267, + "loss": 1.5593502521514893, + "loss_ce": 0.004662739112973213, + "loss_iou": 0.67578125, + "loss_num": 0.04052734375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 192660764, + "step": 2909 + }, + { + "epoch": 0.2723826461365657, + "grad_norm": 20.43279266357422, + "learning_rate": 5e-05, + "loss": 1.3634, + "num_input_tokens_seen": 192727436, + "step": 2910 + }, + { + "epoch": 0.2723826461365657, + "loss": 1.2430782318115234, + "loss_ce": 0.005773566663265228, + "loss_iou": 0.515625, + "loss_num": 0.040283203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 192727436, + "step": 2910 + }, + { + "epoch": 0.2724762484204615, + "grad_norm": 32.585853576660156, + "learning_rate": 5e-05, + "loss": 1.3047, + "num_input_tokens_seen": 192793132, + "step": 2911 + }, + { + "epoch": 0.2724762484204615, + "loss": 1.2785714864730835, + "loss_ce": 0.003180821891874075, + "loss_iou": 0.5390625, + "loss_num": 0.03857421875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 192793132, + "step": 2911 + }, + { + "epoch": 0.2725698507043572, + "grad_norm": 26.237361907958984, + "learning_rate": 5e-05, + "loss": 1.5109, + "num_input_tokens_seen": 192859248, + "step": 2912 + }, + { + "epoch": 0.2725698507043572, + "loss": 1.487302303314209, + "loss_ce": 0.005368643440306187, + "loss_iou": 0.578125, + "loss_num": 0.0654296875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 192859248, + "step": 2912 + }, + { + "epoch": 0.2726634529882529, + "grad_norm": 21.532819747924805, + "learning_rate": 5e-05, + "loss": 1.5673, + "num_input_tokens_seen": 192924620, + "step": 2913 + }, + { + "epoch": 0.2726634529882529, + "loss": 1.6626427173614502, + "loss_ce": 0.0024864422157406807, + "loss_iou": 0.66796875, + "loss_num": 0.0654296875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 192924620, + "step": 2913 + }, + { + "epoch": 0.27275705527214866, + "grad_norm": 169.73837280273438, + "learning_rate": 5e-05, + "loss": 1.3346, + "num_input_tokens_seen": 192990120, + "step": 2914 + }, + { + "epoch": 0.27275705527214866, + "loss": 1.394890308380127, + "loss_ce": 0.002800529822707176, + "loss_iou": 0.578125, + "loss_num": 0.04736328125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 192990120, + "step": 2914 + }, + { + "epoch": 0.2728506575560444, + "grad_norm": 20.661970138549805, + "learning_rate": 5e-05, + "loss": 1.4836, + "num_input_tokens_seen": 193056160, + "step": 2915 + }, + { + "epoch": 0.2728506575560444, + "loss": 1.3897316455841064, + "loss_ce": 0.0035012071020901203, + "loss_iou": 0.58203125, + "loss_num": 0.045166015625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 193056160, + "step": 2915 + }, + { + "epoch": 0.2729442598399401, + "grad_norm": 36.34321594238281, + "learning_rate": 5e-05, + "loss": 1.2865, + "num_input_tokens_seen": 193122320, + "step": 2916 + }, + { + "epoch": 0.2729442598399401, + "loss": 1.4770772457122803, + "loss_ce": 0.004421040415763855, + "loss_iou": 0.609375, + "loss_num": 0.051025390625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 193122320, + "step": 2916 + }, + { + "epoch": 0.2730378621238358, + "grad_norm": 23.29226303100586, + "learning_rate": 5e-05, + "loss": 1.7117, + "num_input_tokens_seen": 193188984, + "step": 2917 + }, + { + "epoch": 0.2730378621238358, + "loss": 1.7196775674819946, + "loss_ce": 0.004833762533962727, + "loss_iou": 0.74609375, + "loss_num": 0.045166015625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 193188984, + "step": 2917 + }, + { + "epoch": 0.27313146440773156, + "grad_norm": 17.75802230834961, + "learning_rate": 5e-05, + "loss": 1.3121, + "num_input_tokens_seen": 193255256, + "step": 2918 + }, + { + "epoch": 0.27313146440773156, + "loss": 1.1056413650512695, + "loss_ce": 0.0035905414260923862, + "loss_iou": 0.4375, + "loss_num": 0.0458984375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 193255256, + "step": 2918 + }, + { + "epoch": 0.27322506669162727, + "grad_norm": 15.197121620178223, + "learning_rate": 5e-05, + "loss": 1.2931, + "num_input_tokens_seen": 193321308, + "step": 2919 + }, + { + "epoch": 0.27322506669162727, + "loss": 1.4740924835205078, + "loss_ce": 0.004854124039411545, + "loss_iou": 0.5859375, + "loss_num": 0.059814453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 193321308, + "step": 2919 + }, + { + "epoch": 0.273318668975523, + "grad_norm": 14.801645278930664, + "learning_rate": 5e-05, + "loss": 1.266, + "num_input_tokens_seen": 193386928, + "step": 2920 + }, + { + "epoch": 0.273318668975523, + "loss": 1.546339511871338, + "loss_ce": 0.00630049267783761, + "loss_iou": 0.625, + "loss_num": 0.05859375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 193386928, + "step": 2920 + }, + { + "epoch": 0.27341227125941875, + "grad_norm": 35.99678039550781, + "learning_rate": 5e-05, + "loss": 1.5811, + "num_input_tokens_seen": 193454828, + "step": 2921 + }, + { + "epoch": 0.27341227125941875, + "loss": 1.6381123065948486, + "loss_ce": 0.00627634534612298, + "loss_iou": 0.62890625, + "loss_num": 0.07470703125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 193454828, + "step": 2921 + }, + { + "epoch": 0.27350587354331446, + "grad_norm": 24.44113540649414, + "learning_rate": 5e-05, + "loss": 1.4893, + "num_input_tokens_seen": 193520452, + "step": 2922 + }, + { + "epoch": 0.27350587354331446, + "loss": 1.6143724918365479, + "loss_ce": 0.003044459968805313, + "loss_iou": 0.64453125, + "loss_num": 0.06494140625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 193520452, + "step": 2922 + }, + { + "epoch": 0.27359947582721017, + "grad_norm": 20.205230712890625, + "learning_rate": 5e-05, + "loss": 1.3415, + "num_input_tokens_seen": 193586104, + "step": 2923 + }, + { + "epoch": 0.27359947582721017, + "loss": 1.3367654085159302, + "loss_ce": 0.014988003298640251, + "loss_iou": 0.50390625, + "loss_num": 0.0634765625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 193586104, + "step": 2923 + }, + { + "epoch": 0.27369307811110594, + "grad_norm": 22.40543556213379, + "learning_rate": 5e-05, + "loss": 1.63, + "num_input_tokens_seen": 193654220, + "step": 2924 + }, + { + "epoch": 0.27369307811110594, + "loss": 1.5842056274414062, + "loss_ce": 0.008033794350922108, + "loss_iou": 0.65625, + "loss_num": 0.052734375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 193654220, + "step": 2924 + }, + { + "epoch": 0.27378668039500165, + "grad_norm": 31.34164047241211, + "learning_rate": 5e-05, + "loss": 1.3402, + "num_input_tokens_seen": 193720852, + "step": 2925 + }, + { + "epoch": 0.27378668039500165, + "loss": 1.2183868885040283, + "loss_ce": 0.0030548027716577053, + "loss_iou": 0.50390625, + "loss_num": 0.042236328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 193720852, + "step": 2925 + }, + { + "epoch": 0.27388028267889736, + "grad_norm": 21.536550521850586, + "learning_rate": 5e-05, + "loss": 1.5679, + "num_input_tokens_seen": 193787988, + "step": 2926 + }, + { + "epoch": 0.27388028267889736, + "loss": 1.6892585754394531, + "loss_ce": 0.009571101516485214, + "loss_iou": 0.66796875, + "loss_num": 0.068359375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 193787988, + "step": 2926 + }, + { + "epoch": 0.27397388496279307, + "grad_norm": 26.01751136779785, + "learning_rate": 5e-05, + "loss": 1.4598, + "num_input_tokens_seen": 193853776, + "step": 2927 + }, + { + "epoch": 0.27397388496279307, + "loss": 1.3240104913711548, + "loss_ce": 0.00955736543983221, + "loss_iou": 0.5078125, + "loss_num": 0.059326171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 193853776, + "step": 2927 + }, + { + "epoch": 0.27406748724668883, + "grad_norm": 19.382383346557617, + "learning_rate": 5e-05, + "loss": 1.4183, + "num_input_tokens_seen": 193920368, + "step": 2928 + }, + { + "epoch": 0.27406748724668883, + "loss": 1.4621467590332031, + "loss_ce": 0.013904567807912827, + "loss_iou": 0.59375, + "loss_num": 0.052734375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 193920368, + "step": 2928 + }, + { + "epoch": 0.27416108953058455, + "grad_norm": 28.4448299407959, + "learning_rate": 5e-05, + "loss": 1.2862, + "num_input_tokens_seen": 193985652, + "step": 2929 + }, + { + "epoch": 0.27416108953058455, + "loss": 1.1947191953659058, + "loss_ce": 0.008592411875724792, + "loss_iou": 0.515625, + "loss_num": 0.0303955078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 193985652, + "step": 2929 + }, + { + "epoch": 0.27425469181448026, + "grad_norm": 157.48585510253906, + "learning_rate": 5e-05, + "loss": 1.303, + "num_input_tokens_seen": 194050880, + "step": 2930 + }, + { + "epoch": 0.27425469181448026, + "loss": 1.106088638305664, + "loss_ce": 0.0033055199310183525, + "loss_iou": 0.412109375, + "loss_num": 0.055419921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 194050880, + "step": 2930 + }, + { + "epoch": 0.274348294098376, + "grad_norm": 27.36667823791504, + "learning_rate": 5e-05, + "loss": 1.2582, + "num_input_tokens_seen": 194118500, + "step": 2931 + }, + { + "epoch": 0.274348294098376, + "loss": 1.366217851638794, + "loss_ce": 0.00977259874343872, + "loss_iou": 0.53125, + "loss_num": 0.05908203125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 194118500, + "step": 2931 + }, + { + "epoch": 0.27444189638227173, + "grad_norm": 45.056976318359375, + "learning_rate": 5e-05, + "loss": 1.1968, + "num_input_tokens_seen": 194184068, + "step": 2932 + }, + { + "epoch": 0.27444189638227173, + "loss": 1.1848256587982178, + "loss_ce": 0.003184966742992401, + "loss_iou": 0.51953125, + "loss_num": 0.0289306640625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 194184068, + "step": 2932 + }, + { + "epoch": 0.27453549866616744, + "grad_norm": 37.12651062011719, + "learning_rate": 5e-05, + "loss": 1.2082, + "num_input_tokens_seen": 194251356, + "step": 2933 + }, + { + "epoch": 0.27453549866616744, + "loss": 1.125702142715454, + "loss_ce": 0.0055850474163889885, + "loss_iou": 0.48046875, + "loss_num": 0.03173828125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 194251356, + "step": 2933 + }, + { + "epoch": 0.27462910095006315, + "grad_norm": 32.226539611816406, + "learning_rate": 5e-05, + "loss": 1.6397, + "num_input_tokens_seen": 194317776, + "step": 2934 + }, + { + "epoch": 0.27462910095006315, + "loss": 1.6473486423492432, + "loss_ce": 0.003793943440541625, + "loss_iou": 0.6953125, + "loss_num": 0.05029296875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 194317776, + "step": 2934 + }, + { + "epoch": 0.2747227032339589, + "grad_norm": 36.53327178955078, + "learning_rate": 5e-05, + "loss": 1.2686, + "num_input_tokens_seen": 194384300, + "step": 2935 + }, + { + "epoch": 0.2747227032339589, + "loss": 1.4336204528808594, + "loss_ce": 0.002956465817987919, + "loss_iou": 0.59375, + "loss_num": 0.048095703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 194384300, + "step": 2935 + }, + { + "epoch": 0.27481630551785463, + "grad_norm": 38.007835388183594, + "learning_rate": 5e-05, + "loss": 1.6442, + "num_input_tokens_seen": 194450960, + "step": 2936 + }, + { + "epoch": 0.27481630551785463, + "loss": 1.7354243993759155, + "loss_ce": 0.0020259853918105364, + "loss_iou": 0.72265625, + "loss_num": 0.057373046875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 194450960, + "step": 2936 + }, + { + "epoch": 0.27490990780175034, + "grad_norm": 23.463176727294922, + "learning_rate": 5e-05, + "loss": 1.6341, + "num_input_tokens_seen": 194517312, + "step": 2937 + }, + { + "epoch": 0.27490990780175034, + "loss": 1.3556169271469116, + "loss_ce": 0.006495887879282236, + "loss_iou": 0.578125, + "loss_num": 0.03955078125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 194517312, + "step": 2937 + }, + { + "epoch": 0.2750035100856461, + "grad_norm": 54.84428024291992, + "learning_rate": 5e-05, + "loss": 1.2504, + "num_input_tokens_seen": 194583340, + "step": 2938 + }, + { + "epoch": 0.2750035100856461, + "loss": 1.0731542110443115, + "loss_ce": 0.0057714711874723434, + "loss_iou": 0.46875, + "loss_num": 0.02587890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 194583340, + "step": 2938 + }, + { + "epoch": 0.2750971123695418, + "grad_norm": 17.140056610107422, + "learning_rate": 5e-05, + "loss": 1.3787, + "num_input_tokens_seen": 194649472, + "step": 2939 + }, + { + "epoch": 0.2750971123695418, + "loss": 1.184022068977356, + "loss_ce": 0.003113870043307543, + "loss_iou": 0.49609375, + "loss_num": 0.03759765625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 194649472, + "step": 2939 + }, + { + "epoch": 0.27519071465343753, + "grad_norm": 34.75260925292969, + "learning_rate": 5e-05, + "loss": 1.6385, + "num_input_tokens_seen": 194714968, + "step": 2940 + }, + { + "epoch": 0.27519071465343753, + "loss": 1.5107307434082031, + "loss_ce": 0.003406504401937127, + "loss_iou": 0.61328125, + "loss_num": 0.056884765625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 194714968, + "step": 2940 + }, + { + "epoch": 0.2752843169373333, + "grad_norm": 20.1380558013916, + "learning_rate": 5e-05, + "loss": 1.5287, + "num_input_tokens_seen": 194781976, + "step": 2941 + }, + { + "epoch": 0.2752843169373333, + "loss": 1.6637755632400513, + "loss_ce": 0.005572447087615728, + "loss_iou": 0.70703125, + "loss_num": 0.04833984375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 194781976, + "step": 2941 + }, + { + "epoch": 0.275377919221229, + "grad_norm": 19.91967010498047, + "learning_rate": 5e-05, + "loss": 1.4581, + "num_input_tokens_seen": 194847748, + "step": 2942 + }, + { + "epoch": 0.275377919221229, + "loss": 1.4977524280548096, + "loss_ce": 0.0031234631314873695, + "loss_iou": 0.58984375, + "loss_num": 0.0625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 194847748, + "step": 2942 + }, + { + "epoch": 0.2754715215051247, + "grad_norm": 27.349401473999023, + "learning_rate": 5e-05, + "loss": 1.4075, + "num_input_tokens_seen": 194913456, + "step": 2943 + }, + { + "epoch": 0.2754715215051247, + "loss": 1.5793519020080566, + "loss_ce": 0.0065980092622339725, + "loss_iou": 0.63671875, + "loss_num": 0.0595703125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 194913456, + "step": 2943 + }, + { + "epoch": 0.2755651237890204, + "grad_norm": 32.3651123046875, + "learning_rate": 5e-05, + "loss": 1.3806, + "num_input_tokens_seen": 194980020, + "step": 2944 + }, + { + "epoch": 0.2755651237890204, + "loss": 1.4813790321350098, + "loss_ce": 0.0023751629050821066, + "loss_iou": 0.609375, + "loss_num": 0.05224609375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 194980020, + "step": 2944 + }, + { + "epoch": 0.2756587260729162, + "grad_norm": 53.90221405029297, + "learning_rate": 5e-05, + "loss": 1.6783, + "num_input_tokens_seen": 195045816, + "step": 2945 + }, + { + "epoch": 0.2756587260729162, + "loss": 1.4300849437713623, + "loss_ce": 0.0033271731808781624, + "loss_iou": 0.60546875, + "loss_num": 0.04345703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 195045816, + "step": 2945 + }, + { + "epoch": 0.2757523283568119, + "grad_norm": 44.97574996948242, + "learning_rate": 5e-05, + "loss": 1.4494, + "num_input_tokens_seen": 195111356, + "step": 2946 + }, + { + "epoch": 0.2757523283568119, + "loss": 1.2630583047866821, + "loss_ce": 0.004269261844456196, + "loss_iou": 0.55078125, + "loss_num": 0.031494140625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 195111356, + "step": 2946 + }, + { + "epoch": 0.2758459306407076, + "grad_norm": 27.603666305541992, + "learning_rate": 5e-05, + "loss": 1.4105, + "num_input_tokens_seen": 195178164, + "step": 2947 + }, + { + "epoch": 0.2758459306407076, + "loss": 1.4767403602600098, + "loss_ce": 0.008966884575784206, + "loss_iou": 0.62109375, + "loss_num": 0.04541015625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 195178164, + "step": 2947 + }, + { + "epoch": 0.2759395329246034, + "grad_norm": 52.44571304321289, + "learning_rate": 5e-05, + "loss": 1.6437, + "num_input_tokens_seen": 195243740, + "step": 2948 + }, + { + "epoch": 0.2759395329246034, + "loss": 1.5359150171279907, + "loss_ce": 0.004665090702474117, + "loss_iou": 0.671875, + "loss_num": 0.03759765625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 195243740, + "step": 2948 + }, + { + "epoch": 0.2760331352084991, + "grad_norm": 15.909627914428711, + "learning_rate": 5e-05, + "loss": 1.1997, + "num_input_tokens_seen": 195310312, + "step": 2949 + }, + { + "epoch": 0.2760331352084991, + "loss": 1.267456293106079, + "loss_ce": 0.006958204787224531, + "loss_iou": 0.54296875, + "loss_num": 0.03515625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 195310312, + "step": 2949 + }, + { + "epoch": 0.2761267374923948, + "grad_norm": 22.354604721069336, + "learning_rate": 5e-05, + "loss": 1.1309, + "num_input_tokens_seen": 195376212, + "step": 2950 + }, + { + "epoch": 0.2761267374923948, + "loss": 1.3071309328079224, + "loss_ce": 0.007814496755599976, + "loss_iou": 0.5390625, + "loss_num": 0.044677734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 195376212, + "step": 2950 + }, + { + "epoch": 0.27622033977629057, + "grad_norm": 37.252540588378906, + "learning_rate": 5e-05, + "loss": 1.4142, + "num_input_tokens_seen": 195441904, + "step": 2951 + }, + { + "epoch": 0.27622033977629057, + "loss": 1.4334460496902466, + "loss_ce": 0.005223315209150314, + "loss_iou": 0.59375, + "loss_num": 0.047607421875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 195441904, + "step": 2951 + }, + { + "epoch": 0.2763139420601863, + "grad_norm": 18.50592613220215, + "learning_rate": 5e-05, + "loss": 1.1673, + "num_input_tokens_seen": 195506856, + "step": 2952 + }, + { + "epoch": 0.2763139420601863, + "loss": 1.4549554586410522, + "loss_ce": 0.003783546620979905, + "loss_iou": 0.6171875, + "loss_num": 0.043701171875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 195506856, + "step": 2952 + }, + { + "epoch": 0.276407544344082, + "grad_norm": 14.848483085632324, + "learning_rate": 5e-05, + "loss": 1.2464, + "num_input_tokens_seen": 195573092, + "step": 2953 + }, + { + "epoch": 0.276407544344082, + "loss": 1.1065553426742554, + "loss_ce": 0.0073121171444654465, + "loss_iou": 0.4609375, + "loss_num": 0.03515625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 195573092, + "step": 2953 + }, + { + "epoch": 0.2765011466279777, + "grad_norm": 15.150407791137695, + "learning_rate": 5e-05, + "loss": 1.0623, + "num_input_tokens_seen": 195639176, + "step": 2954 + }, + { + "epoch": 0.2765011466279777, + "loss": 1.060774564743042, + "loss_ce": 0.006087028421461582, + "loss_iou": 0.416015625, + "loss_num": 0.044677734375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 195639176, + "step": 2954 + }, + { + "epoch": 0.27659474891187347, + "grad_norm": 17.979511260986328, + "learning_rate": 5e-05, + "loss": 1.2765, + "num_input_tokens_seen": 195705584, + "step": 2955 + }, + { + "epoch": 0.27659474891187347, + "loss": 1.4038522243499756, + "loss_ce": 0.005414669401943684, + "loss_iou": 0.61328125, + "loss_num": 0.033935546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 195705584, + "step": 2955 + }, + { + "epoch": 0.2766883511957692, + "grad_norm": 69.79885864257812, + "learning_rate": 5e-05, + "loss": 1.2342, + "num_input_tokens_seen": 195772380, + "step": 2956 + }, + { + "epoch": 0.2766883511957692, + "loss": 1.0038360357284546, + "loss_ce": 0.003836047602817416, + "loss_iou": 0.439453125, + "loss_num": 0.024169921875, + "loss_xval": 1.0, + "num_input_tokens_seen": 195772380, + "step": 2956 + }, + { + "epoch": 0.2767819534796649, + "grad_norm": 21.76862144470215, + "learning_rate": 5e-05, + "loss": 1.3137, + "num_input_tokens_seen": 195838060, + "step": 2957 + }, + { + "epoch": 0.2767819534796649, + "loss": 1.3768516778945923, + "loss_ce": 0.006734518799930811, + "loss_iou": 0.52734375, + "loss_num": 0.0625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 195838060, + "step": 2957 + }, + { + "epoch": 0.27687555576356065, + "grad_norm": 37.61552429199219, + "learning_rate": 5e-05, + "loss": 1.4519, + "num_input_tokens_seen": 195903216, + "step": 2958 + }, + { + "epoch": 0.27687555576356065, + "loss": 1.625257968902588, + "loss_ce": 0.007093844935297966, + "loss_iou": 0.6640625, + "loss_num": 0.058349609375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 195903216, + "step": 2958 + }, + { + "epoch": 0.27696915804745637, + "grad_norm": 22.148937225341797, + "learning_rate": 5e-05, + "loss": 1.5944, + "num_input_tokens_seen": 195970892, + "step": 2959 + }, + { + "epoch": 0.27696915804745637, + "loss": 1.5753856897354126, + "loss_ce": 0.008979421108961105, + "loss_iou": 0.59765625, + "loss_num": 0.07373046875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 195970892, + "step": 2959 + }, + { + "epoch": 0.2770627603313521, + "grad_norm": 188.84820556640625, + "learning_rate": 5e-05, + "loss": 1.2096, + "num_input_tokens_seen": 196036800, + "step": 2960 + }, + { + "epoch": 0.2770627603313521, + "loss": 1.1954622268676758, + "loss_ce": 0.006009131204336882, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 196036800, + "step": 2960 + }, + { + "epoch": 0.2771563626152478, + "grad_norm": 40.225379943847656, + "learning_rate": 5e-05, + "loss": 1.4014, + "num_input_tokens_seen": 196102516, + "step": 2961 + }, + { + "epoch": 0.2771563626152478, + "loss": 1.2238408327102661, + "loss_ce": 0.0036260043270885944, + "loss_iou": 0.5234375, + "loss_num": 0.0341796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 196102516, + "step": 2961 + }, + { + "epoch": 0.27724996489914355, + "grad_norm": 30.62041473388672, + "learning_rate": 5e-05, + "loss": 1.5254, + "num_input_tokens_seen": 196169528, + "step": 2962 + }, + { + "epoch": 0.27724996489914355, + "loss": 1.6488304138183594, + "loss_ce": 0.008205480873584747, + "loss_iou": 0.67578125, + "loss_num": 0.05859375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 196169528, + "step": 2962 + }, + { + "epoch": 0.27734356718303926, + "grad_norm": 19.16793441772461, + "learning_rate": 5e-05, + "loss": 1.203, + "num_input_tokens_seen": 196235160, + "step": 2963 + }, + { + "epoch": 0.27734356718303926, + "loss": 1.2300286293029785, + "loss_ce": 0.005968559067696333, + "loss_iou": 0.546875, + "loss_num": 0.0262451171875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 196235160, + "step": 2963 + }, + { + "epoch": 0.277437169466935, + "grad_norm": 13.29127311706543, + "learning_rate": 5e-05, + "loss": 1.3865, + "num_input_tokens_seen": 196300332, + "step": 2964 + }, + { + "epoch": 0.277437169466935, + "loss": 1.5598807334899902, + "loss_ce": 0.0042167287319898605, + "loss_iou": 0.64453125, + "loss_num": 0.0537109375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 196300332, + "step": 2964 + }, + { + "epoch": 0.27753077175083074, + "grad_norm": 21.9099178314209, + "learning_rate": 5e-05, + "loss": 1.1395, + "num_input_tokens_seen": 196366608, + "step": 2965 + }, + { + "epoch": 0.27753077175083074, + "loss": 1.1302073001861572, + "loss_ce": 0.004719014745205641, + "loss_iou": 0.51953125, + "loss_num": 0.0179443359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 196366608, + "step": 2965 + }, + { + "epoch": 0.27762437403472645, + "grad_norm": 34.35336685180664, + "learning_rate": 5e-05, + "loss": 1.1855, + "num_input_tokens_seen": 196433176, + "step": 2966 + }, + { + "epoch": 0.27762437403472645, + "loss": 1.1923853158950806, + "loss_ce": 0.0024439168628305197, + "loss_iou": 0.53125, + "loss_num": 0.0257568359375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 196433176, + "step": 2966 + }, + { + "epoch": 0.27771797631862216, + "grad_norm": 32.035465240478516, + "learning_rate": 5e-05, + "loss": 1.5564, + "num_input_tokens_seen": 196499096, + "step": 2967 + }, + { + "epoch": 0.27771797631862216, + "loss": 1.439605474472046, + "loss_ce": 0.00796491652727127, + "loss_iou": 0.5390625, + "loss_num": 0.07080078125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 196499096, + "step": 2967 + }, + { + "epoch": 0.27781157860251793, + "grad_norm": 41.921775817871094, + "learning_rate": 5e-05, + "loss": 1.1717, + "num_input_tokens_seen": 196564628, + "step": 2968 + }, + { + "epoch": 0.27781157860251793, + "loss": 1.1814939975738525, + "loss_ce": 0.004736181348562241, + "loss_iou": 0.515625, + "loss_num": 0.02880859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 196564628, + "step": 2968 + }, + { + "epoch": 0.27790518088641364, + "grad_norm": 20.5039005279541, + "learning_rate": 5e-05, + "loss": 1.0963, + "num_input_tokens_seen": 196631252, + "step": 2969 + }, + { + "epoch": 0.27790518088641364, + "loss": 1.1294777393341064, + "loss_ce": 0.004477834329009056, + "loss_iou": 0.51171875, + "loss_num": 0.019775390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 196631252, + "step": 2969 + }, + { + "epoch": 0.27799878317030935, + "grad_norm": 30.32698631286621, + "learning_rate": 5e-05, + "loss": 1.4396, + "num_input_tokens_seen": 196697688, + "step": 2970 + }, + { + "epoch": 0.27799878317030935, + "loss": 1.4818978309631348, + "loss_ce": 0.004846978932619095, + "loss_iou": 0.60546875, + "loss_num": 0.0537109375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 196697688, + "step": 2970 + }, + { + "epoch": 0.27809238545420506, + "grad_norm": 38.12827682495117, + "learning_rate": 5e-05, + "loss": 1.4142, + "num_input_tokens_seen": 196765388, + "step": 2971 + }, + { + "epoch": 0.27809238545420506, + "loss": 1.5492026805877686, + "loss_ce": 0.003304253565147519, + "loss_iou": 0.671875, + "loss_num": 0.040771484375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 196765388, + "step": 2971 + }, + { + "epoch": 0.2781859877381008, + "grad_norm": 21.766862869262695, + "learning_rate": 5e-05, + "loss": 1.3314, + "num_input_tokens_seen": 196831972, + "step": 2972 + }, + { + "epoch": 0.2781859877381008, + "loss": 1.363213300704956, + "loss_ce": 0.005791435018181801, + "loss_iou": 0.578125, + "loss_num": 0.03955078125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 196831972, + "step": 2972 + }, + { + "epoch": 0.27827959002199654, + "grad_norm": 32.41834259033203, + "learning_rate": 5e-05, + "loss": 1.4518, + "num_input_tokens_seen": 196898752, + "step": 2973 + }, + { + "epoch": 0.27827959002199654, + "loss": 1.669748067855835, + "loss_ce": 0.004709017463028431, + "loss_iou": 0.68359375, + "loss_num": 0.059814453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 196898752, + "step": 2973 + }, + { + "epoch": 0.27837319230589225, + "grad_norm": 19.054615020751953, + "learning_rate": 5e-05, + "loss": 1.4841, + "num_input_tokens_seen": 196964800, + "step": 2974 + }, + { + "epoch": 0.27837319230589225, + "loss": 1.5721250772476196, + "loss_ce": 0.0047422437928617, + "loss_iou": 0.70703125, + "loss_num": 0.03076171875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 196964800, + "step": 2974 + }, + { + "epoch": 0.278466794589788, + "grad_norm": 20.208951950073242, + "learning_rate": 5e-05, + "loss": 1.3882, + "num_input_tokens_seen": 197031488, + "step": 2975 + }, + { + "epoch": 0.278466794589788, + "loss": 1.6346020698547363, + "loss_ce": 0.007160606794059277, + "loss_iou": 0.65625, + "loss_num": 0.0634765625, + "loss_xval": 1.625, + "num_input_tokens_seen": 197031488, + "step": 2975 + }, + { + "epoch": 0.2785603968736837, + "grad_norm": 38.382999420166016, + "learning_rate": 5e-05, + "loss": 1.2024, + "num_input_tokens_seen": 197097028, + "step": 2976 + }, + { + "epoch": 0.2785603968736837, + "loss": 1.2237555980682373, + "loss_ce": 0.010376797057688236, + "loss_iou": 0.52734375, + "loss_num": 0.032470703125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 197097028, + "step": 2976 + }, + { + "epoch": 0.27865399915757944, + "grad_norm": 22.0007266998291, + "learning_rate": 5e-05, + "loss": 1.533, + "num_input_tokens_seen": 197162968, + "step": 2977 + }, + { + "epoch": 0.27865399915757944, + "loss": 1.6800990104675293, + "loss_ce": 0.006270837038755417, + "loss_iou": 0.65625, + "loss_num": 0.07177734375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 197162968, + "step": 2977 + }, + { + "epoch": 0.27874760144147515, + "grad_norm": 27.104589462280273, + "learning_rate": 5e-05, + "loss": 1.2597, + "num_input_tokens_seen": 197229888, + "step": 2978 + }, + { + "epoch": 0.27874760144147515, + "loss": 1.3282208442687988, + "loss_ce": 0.00302558159455657, + "loss_iou": 0.55078125, + "loss_num": 0.0439453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 197229888, + "step": 2978 + }, + { + "epoch": 0.2788412037253709, + "grad_norm": 18.77463150024414, + "learning_rate": 5e-05, + "loss": 1.2428, + "num_input_tokens_seen": 197295440, + "step": 2979 + }, + { + "epoch": 0.2788412037253709, + "loss": 1.28849458694458, + "loss_ce": 0.006878322921693325, + "loss_iou": 0.5625, + "loss_num": 0.03125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 197295440, + "step": 2979 + }, + { + "epoch": 0.2789348060092666, + "grad_norm": 19.471467971801758, + "learning_rate": 5e-05, + "loss": 1.4861, + "num_input_tokens_seen": 197361704, + "step": 2980 + }, + { + "epoch": 0.2789348060092666, + "loss": 1.2765750885009766, + "loss_ce": 0.005090806633234024, + "loss_iou": 0.55078125, + "loss_num": 0.033935546875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 197361704, + "step": 2980 + }, + { + "epoch": 0.27902840829316233, + "grad_norm": 29.813678741455078, + "learning_rate": 5e-05, + "loss": 1.3685, + "num_input_tokens_seen": 197428532, + "step": 2981 + }, + { + "epoch": 0.27902840829316233, + "loss": 1.3627123832702637, + "loss_ce": 0.006755331996828318, + "loss_iou": 0.578125, + "loss_num": 0.0400390625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 197428532, + "step": 2981 + }, + { + "epoch": 0.2791220105770581, + "grad_norm": 28.789997100830078, + "learning_rate": 5e-05, + "loss": 1.5348, + "num_input_tokens_seen": 197494760, + "step": 2982 + }, + { + "epoch": 0.2791220105770581, + "loss": 1.7443268299102783, + "loss_ce": 0.003115879837423563, + "loss_iou": 0.7109375, + "loss_num": 0.0634765625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 197494760, + "step": 2982 + }, + { + "epoch": 0.2792156128609538, + "grad_norm": 27.926149368286133, + "learning_rate": 5e-05, + "loss": 1.3637, + "num_input_tokens_seen": 197561264, + "step": 2983 + }, + { + "epoch": 0.2792156128609538, + "loss": 1.33914315700531, + "loss_ce": 0.007111984305083752, + "loss_iou": 0.52734375, + "loss_num": 0.055908203125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 197561264, + "step": 2983 + }, + { + "epoch": 0.2793092151448495, + "grad_norm": 27.39906883239746, + "learning_rate": 5e-05, + "loss": 1.3701, + "num_input_tokens_seen": 197628624, + "step": 2984 + }, + { + "epoch": 0.2793092151448495, + "loss": 1.346161127090454, + "loss_ce": 0.003387751057744026, + "loss_iou": 0.55078125, + "loss_num": 0.048828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 197628624, + "step": 2984 + }, + { + "epoch": 0.2794028174287453, + "grad_norm": 23.686147689819336, + "learning_rate": 5e-05, + "loss": 1.147, + "num_input_tokens_seen": 197695456, + "step": 2985 + }, + { + "epoch": 0.2794028174287453, + "loss": 0.9516488313674927, + "loss_ce": 0.0029184240847826004, + "loss_iou": 0.390625, + "loss_num": 0.033447265625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 197695456, + "step": 2985 + }, + { + "epoch": 0.279496419712641, + "grad_norm": 41.54317092895508, + "learning_rate": 5e-05, + "loss": 1.2913, + "num_input_tokens_seen": 197760268, + "step": 2986 + }, + { + "epoch": 0.279496419712641, + "loss": 1.451521396636963, + "loss_ce": 0.004744125995784998, + "loss_iou": 0.6015625, + "loss_num": 0.048095703125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 197760268, + "step": 2986 + }, + { + "epoch": 0.2795900219965367, + "grad_norm": 30.887428283691406, + "learning_rate": 5e-05, + "loss": 1.2778, + "num_input_tokens_seen": 197826376, + "step": 2987 + }, + { + "epoch": 0.2795900219965367, + "loss": 1.0921101570129395, + "loss_ce": 0.006050621159374714, + "loss_iou": 0.46484375, + "loss_num": 0.031005859375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 197826376, + "step": 2987 + }, + { + "epoch": 0.2796836242804324, + "grad_norm": 54.29893112182617, + "learning_rate": 5e-05, + "loss": 1.4286, + "num_input_tokens_seen": 197893984, + "step": 2988 + }, + { + "epoch": 0.2796836242804324, + "loss": 1.2299041748046875, + "loss_ce": 0.0028534168377518654, + "loss_iou": 0.5, + "loss_num": 0.04541015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 197893984, + "step": 2988 + }, + { + "epoch": 0.2797772265643282, + "grad_norm": 12.72755241394043, + "learning_rate": 5e-05, + "loss": 1.3166, + "num_input_tokens_seen": 197959892, + "step": 2989 + }, + { + "epoch": 0.2797772265643282, + "loss": 1.489954948425293, + "loss_ce": 0.007533114403486252, + "loss_iou": 0.53125, + "loss_num": 0.08447265625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 197959892, + "step": 2989 + }, + { + "epoch": 0.2798708288482239, + "grad_norm": 16.953954696655273, + "learning_rate": 5e-05, + "loss": 1.0609, + "num_input_tokens_seen": 198027220, + "step": 2990 + }, + { + "epoch": 0.2798708288482239, + "loss": 0.993353545665741, + "loss_ce": 0.0011660554446280003, + "loss_iou": 0.431640625, + "loss_num": 0.0255126953125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 198027220, + "step": 2990 + }, + { + "epoch": 0.2799644311321196, + "grad_norm": 20.705690383911133, + "learning_rate": 5e-05, + "loss": 1.1586, + "num_input_tokens_seen": 198093512, + "step": 2991 + }, + { + "epoch": 0.2799644311321196, + "loss": 1.2157834768295288, + "loss_ce": 0.009591442532837391, + "loss_iou": 0.482421875, + "loss_num": 0.04833984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 198093512, + "step": 2991 + }, + { + "epoch": 0.2800580334160154, + "grad_norm": 49.34597396850586, + "learning_rate": 5e-05, + "loss": 1.3502, + "num_input_tokens_seen": 198160084, + "step": 2992 + }, + { + "epoch": 0.2800580334160154, + "loss": 1.2780870199203491, + "loss_ce": 0.0036730102729052305, + "loss_iou": 0.5625, + "loss_num": 0.02978515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 198160084, + "step": 2992 + }, + { + "epoch": 0.2801516356999111, + "grad_norm": 24.546768188476562, + "learning_rate": 5e-05, + "loss": 1.5367, + "num_input_tokens_seen": 198226756, + "step": 2993 + }, + { + "epoch": 0.2801516356999111, + "loss": 1.599737286567688, + "loss_ce": 0.009405290707945824, + "loss_iou": 0.6640625, + "loss_num": 0.052490234375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 198226756, + "step": 2993 + }, + { + "epoch": 0.2802452379838068, + "grad_norm": 36.58216857910156, + "learning_rate": 5e-05, + "loss": 1.3061, + "num_input_tokens_seen": 198292888, + "step": 2994 + }, + { + "epoch": 0.2802452379838068, + "loss": 1.2798948287963867, + "loss_ce": 0.0069457050412893295, + "loss_iou": 0.51171875, + "loss_num": 0.049560546875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 198292888, + "step": 2994 + }, + { + "epoch": 0.2803388402677025, + "grad_norm": 24.769245147705078, + "learning_rate": 5e-05, + "loss": 1.5064, + "num_input_tokens_seen": 198359084, + "step": 2995 + }, + { + "epoch": 0.2803388402677025, + "loss": 1.5779725313186646, + "loss_ce": 0.003753824159502983, + "loss_iou": 0.6328125, + "loss_num": 0.0625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 198359084, + "step": 2995 + }, + { + "epoch": 0.28043244255159827, + "grad_norm": 49.46518325805664, + "learning_rate": 5e-05, + "loss": 1.2283, + "num_input_tokens_seen": 198424884, + "step": 2996 + }, + { + "epoch": 0.28043244255159827, + "loss": 1.0733356475830078, + "loss_ce": 0.003511441173031926, + "loss_iou": 0.453125, + "loss_num": 0.033203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 198424884, + "step": 2996 + }, + { + "epoch": 0.280526044835494, + "grad_norm": 21.528493881225586, + "learning_rate": 5e-05, + "loss": 1.3659, + "num_input_tokens_seen": 198492052, + "step": 2997 + }, + { + "epoch": 0.280526044835494, + "loss": 1.4436354637145996, + "loss_ce": 0.0071120294742286205, + "loss_iou": 0.59765625, + "loss_num": 0.048095703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 198492052, + "step": 2997 + }, + { + "epoch": 0.2806196471193897, + "grad_norm": 14.494715690612793, + "learning_rate": 5e-05, + "loss": 1.3066, + "num_input_tokens_seen": 198558456, + "step": 2998 + }, + { + "epoch": 0.2806196471193897, + "loss": 1.400672435760498, + "loss_ce": 0.007361851632595062, + "loss_iou": 0.54296875, + "loss_num": 0.061279296875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 198558456, + "step": 2998 + }, + { + "epoch": 0.28071324940328546, + "grad_norm": 18.866601943969727, + "learning_rate": 5e-05, + "loss": 1.34, + "num_input_tokens_seen": 198625364, + "step": 2999 + }, + { + "epoch": 0.28071324940328546, + "loss": 1.5606812238693237, + "loss_ce": 0.007946882396936417, + "loss_iou": 0.60546875, + "loss_num": 0.068359375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 198625364, + "step": 2999 + }, + { + "epoch": 0.28080685168718117, + "grad_norm": 38.40473556518555, + "learning_rate": 5e-05, + "loss": 1.4858, + "num_input_tokens_seen": 198691704, + "step": 3000 + }, + { + "epoch": 0.28080685168718117, + "eval_seeclick_CIoU": 0.1211421936750412, + "eval_seeclick_GIoU": 0.1370389722287655, + "eval_seeclick_IoU": 0.25907187163829803, + "eval_seeclick_MAE_all": 0.1756228357553482, + "eval_seeclick_MAE_h": 0.07288940250873566, + "eval_seeclick_MAE_w": 0.12167743965983391, + "eval_seeclick_MAE_x_boxes": 0.2986074537038803, + "eval_seeclick_MAE_y_boxes": 0.16409114748239517, + "eval_seeclick_NUM_probability": 0.9997413158416748, + "eval_seeclick_inside_bbox": 0.4072916805744171, + "eval_seeclick_loss": 2.6142280101776123, + "eval_seeclick_loss_ce": 0.014518491458147764, + "eval_seeclick_loss_iou": 0.8895263671875, + "eval_seeclick_loss_num": 0.17076873779296875, + "eval_seeclick_loss_xval": 2.63232421875, + "eval_seeclick_runtime": 73.1545, + "eval_seeclick_samples_per_second": 0.642, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 198691704, + "step": 3000 + }, + { + "epoch": 0.28080685168718117, + "eval_icons_CIoU": -0.06424631550908089, + "eval_icons_GIoU": 0.05589934252202511, + "eval_icons_IoU": 0.11858085170388222, + "eval_icons_MAE_all": 0.1848635897040367, + "eval_icons_MAE_h": 0.13594580814242363, + "eval_icons_MAE_w": 0.17704887688159943, + "eval_icons_MAE_x_boxes": 0.15803752839565277, + "eval_icons_MAE_y_boxes": 0.09891069307923317, + "eval_icons_NUM_probability": 0.999882310628891, + "eval_icons_inside_bbox": 0.1770833358168602, + "eval_icons_loss": 2.838784694671631, + "eval_icons_loss_ce": 4.0777424146654084e-05, + "eval_icons_loss_iou": 0.958251953125, + "eval_icons_loss_num": 0.1959228515625, + "eval_icons_loss_xval": 2.896484375, + "eval_icons_runtime": 75.6626, + "eval_icons_samples_per_second": 0.661, + "eval_icons_steps_per_second": 0.026, + "num_input_tokens_seen": 198691704, + "step": 3000 + }, + { + "epoch": 0.28080685168718117, + "eval_screenspot_CIoU": 0.04999221861362457, + "eval_screenspot_GIoU": 0.07912557261685531, + "eval_screenspot_IoU": 0.20241647958755493, + "eval_screenspot_MAE_all": 0.18727018435796103, + "eval_screenspot_MAE_h": 0.0899810865521431, + "eval_screenspot_MAE_w": 0.13406882186730704, + "eval_screenspot_MAE_x_boxes": 0.2660200943549474, + "eval_screenspot_MAE_y_boxes": 0.1705679049094518, + "eval_screenspot_NUM_probability": 0.9999146262804667, + "eval_screenspot_inside_bbox": 0.39958332975705463, + "eval_screenspot_loss": 2.8131790161132812, + "eval_screenspot_loss_ce": 0.00783678920318683, + "eval_screenspot_loss_iou": 0.9337565104166666, + "eval_screenspot_loss_num": 0.18758138020833334, + "eval_screenspot_loss_xval": 2.8033854166666665, + "eval_screenspot_runtime": 128.2036, + "eval_screenspot_samples_per_second": 0.694, + "eval_screenspot_steps_per_second": 0.023, + "num_input_tokens_seen": 198691704, + "step": 3000 + }, + { + "epoch": 0.28080685168718117, + "eval_compot_CIoU": -0.06844502128660679, + "eval_compot_GIoU": -0.005227629095315933, + "eval_compot_IoU": 0.09907376766204834, + "eval_compot_MAE_all": 0.187107652425766, + "eval_compot_MAE_h": 0.09452997334301472, + "eval_compot_MAE_w": 0.16281145438551903, + "eval_compot_MAE_x_boxes": 0.16504347324371338, + "eval_compot_MAE_y_boxes": 0.17207197099924088, + "eval_compot_NUM_probability": 0.9998281896114349, + "eval_compot_inside_bbox": 0.1736111119389534, + "eval_compot_loss": 3.0248312950134277, + "eval_compot_loss_ce": 0.0022454506251960993, + "eval_compot_loss_iou": 1.035888671875, + "eval_compot_loss_num": 0.2079315185546875, + "eval_compot_loss_xval": 3.11083984375, + "eval_compot_runtime": 69.8459, + "eval_compot_samples_per_second": 0.716, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 198691704, + "step": 3000 + }, + { + "epoch": 0.28080685168718117, + "eval_custom_ui_MAE_all": 0.14128626137971878, + "eval_custom_ui_MAE_x": 0.114876639097929, + "eval_custom_ui_MAE_y": 0.16769587993621826, + "eval_custom_ui_NUM_probability": 0.9999775290489197, + "eval_custom_ui_loss": 0.8501137495040894, + "eval_custom_ui_loss_ce": 0.19192413985729218, + "eval_custom_ui_loss_num": 0.14447021484375, + "eval_custom_ui_loss_xval": 0.7225341796875, + "eval_custom_ui_runtime": 58.6663, + "eval_custom_ui_samples_per_second": 0.852, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 198691704, + "step": 3000 + }, + { + "epoch": 0.28080685168718117, + "loss": 0.904617190361023, + "loss_ce": 0.21296684443950653, + "loss_iou": 0.0, + "loss_num": 0.138671875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 198691704, + "step": 3000 + }, + { + "epoch": 0.2809004539710769, + "grad_norm": 22.667634963989258, + "learning_rate": 5e-05, + "loss": 1.5151, + "num_input_tokens_seen": 198757224, + "step": 3001 + }, + { + "epoch": 0.2809004539710769, + "loss": 1.5522687435150146, + "loss_ce": 0.005393712781369686, + "loss_iou": 0.6328125, + "loss_num": 0.054931640625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 198757224, + "step": 3001 + }, + { + "epoch": 0.28099405625497265, + "grad_norm": 16.118173599243164, + "learning_rate": 5e-05, + "loss": 1.435, + "num_input_tokens_seen": 198823776, + "step": 3002 + }, + { + "epoch": 0.28099405625497265, + "loss": 1.3930273056030273, + "loss_ce": 0.00923829060047865, + "loss_iou": 0.5859375, + "loss_num": 0.042236328125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 198823776, + "step": 3002 + }, + { + "epoch": 0.28108765853886836, + "grad_norm": 39.46125793457031, + "learning_rate": 5e-05, + "loss": 1.2334, + "num_input_tokens_seen": 198889656, + "step": 3003 + }, + { + "epoch": 0.28108765853886836, + "loss": 1.145675778388977, + "loss_ce": 0.004074192140251398, + "loss_iou": 0.50390625, + "loss_num": 0.02685546875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 198889656, + "step": 3003 + }, + { + "epoch": 0.28118126082276407, + "grad_norm": 23.5291748046875, + "learning_rate": 5e-05, + "loss": 1.2286, + "num_input_tokens_seen": 198956604, + "step": 3004 + }, + { + "epoch": 0.28118126082276407, + "loss": 1.3669800758361816, + "loss_ce": 0.004675284028053284, + "loss_iou": 0.55859375, + "loss_num": 0.048828125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 198956604, + "step": 3004 + }, + { + "epoch": 0.2812748631066598, + "grad_norm": 24.502634048461914, + "learning_rate": 5e-05, + "loss": 1.3943, + "num_input_tokens_seen": 199022784, + "step": 3005 + }, + { + "epoch": 0.2812748631066598, + "loss": 1.472860336303711, + "loss_ce": 0.01070205494761467, + "loss_iou": 0.5859375, + "loss_num": 0.05810546875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 199022784, + "step": 3005 + }, + { + "epoch": 0.28136846539055554, + "grad_norm": 20.00545883178711, + "learning_rate": 5e-05, + "loss": 1.1291, + "num_input_tokens_seen": 199089608, + "step": 3006 + }, + { + "epoch": 0.28136846539055554, + "loss": 1.335639476776123, + "loss_ce": 0.005561283323913813, + "loss_iou": 0.55078125, + "loss_num": 0.045654296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 199089608, + "step": 3006 + }, + { + "epoch": 0.28146206767445126, + "grad_norm": 25.719745635986328, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 199156128, + "step": 3007 + }, + { + "epoch": 0.28146206767445126, + "loss": 1.2904092073440552, + "loss_ce": 0.0032998290844261646, + "loss_iou": 0.57421875, + "loss_num": 0.028076171875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 199156128, + "step": 3007 + }, + { + "epoch": 0.28155566995834697, + "grad_norm": 27.325746536254883, + "learning_rate": 5e-05, + "loss": 1.4628, + "num_input_tokens_seen": 199221836, + "step": 3008 + }, + { + "epoch": 0.28155566995834697, + "loss": 1.4212934970855713, + "loss_ce": 0.004301358945667744, + "loss_iou": 0.625, + "loss_num": 0.033447265625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 199221836, + "step": 3008 + }, + { + "epoch": 0.28164927224224273, + "grad_norm": 18.342214584350586, + "learning_rate": 5e-05, + "loss": 1.0876, + "num_input_tokens_seen": 199287648, + "step": 3009 + }, + { + "epoch": 0.28164927224224273, + "loss": 1.2151768207550049, + "loss_ce": 0.0037509948015213013, + "loss_iou": 0.51953125, + "loss_num": 0.0341796875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 199287648, + "step": 3009 + }, + { + "epoch": 0.28174287452613844, + "grad_norm": 21.493207931518555, + "learning_rate": 5e-05, + "loss": 1.5262, + "num_input_tokens_seen": 199354116, + "step": 3010 + }, + { + "epoch": 0.28174287452613844, + "loss": 1.3997085094451904, + "loss_ce": 0.0022476729936897755, + "loss_iou": 0.6171875, + "loss_num": 0.033203125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 199354116, + "step": 3010 + }, + { + "epoch": 0.28183647681003415, + "grad_norm": 21.33682632446289, + "learning_rate": 5e-05, + "loss": 1.1049, + "num_input_tokens_seen": 199420316, + "step": 3011 + }, + { + "epoch": 0.28183647681003415, + "loss": 0.9362991452217102, + "loss_ce": 0.006611632648855448, + "loss_iou": 0.380859375, + "loss_num": 0.033447265625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 199420316, + "step": 3011 + }, + { + "epoch": 0.2819300790939299, + "grad_norm": 32.78297805786133, + "learning_rate": 5e-05, + "loss": 1.318, + "num_input_tokens_seen": 199487272, + "step": 3012 + }, + { + "epoch": 0.2819300790939299, + "loss": 1.1804206371307373, + "loss_ce": 0.00659254239872098, + "loss_iou": 0.5, + "loss_num": 0.033935546875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 199487272, + "step": 3012 + }, + { + "epoch": 0.28202368137782563, + "grad_norm": 26.72673797607422, + "learning_rate": 5e-05, + "loss": 1.2979, + "num_input_tokens_seen": 199553644, + "step": 3013 + }, + { + "epoch": 0.28202368137782563, + "loss": 1.1061081886291504, + "loss_ce": 0.00747529324144125, + "loss_iou": 0.466796875, + "loss_num": 0.033203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 199553644, + "step": 3013 + }, + { + "epoch": 0.28211728366172134, + "grad_norm": 25.93937110900879, + "learning_rate": 5e-05, + "loss": 1.4116, + "num_input_tokens_seen": 199619412, + "step": 3014 + }, + { + "epoch": 0.28211728366172134, + "loss": 1.2846951484680176, + "loss_ce": 0.004909959621727467, + "loss_iou": 0.53515625, + "loss_num": 0.041748046875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 199619412, + "step": 3014 + }, + { + "epoch": 0.28221088594561705, + "grad_norm": 33.346580505371094, + "learning_rate": 5e-05, + "loss": 1.5906, + "num_input_tokens_seen": 199686320, + "step": 3015 + }, + { + "epoch": 0.28221088594561705, + "loss": 1.7591780424118042, + "loss_ce": 0.004783453419804573, + "loss_iou": 0.73828125, + "loss_num": 0.056396484375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 199686320, + "step": 3015 + }, + { + "epoch": 0.2823044882295128, + "grad_norm": 26.67934799194336, + "learning_rate": 5e-05, + "loss": 1.5406, + "num_input_tokens_seen": 199752700, + "step": 3016 + }, + { + "epoch": 0.2823044882295128, + "loss": 1.3743374347686768, + "loss_ce": 0.003243792802095413, + "loss_iou": 0.6015625, + "loss_num": 0.033203125, + "loss_xval": 1.375, + "num_input_tokens_seen": 199752700, + "step": 3016 + }, + { + "epoch": 0.28239809051340853, + "grad_norm": 75.38923645019531, + "learning_rate": 5e-05, + "loss": 1.0007, + "num_input_tokens_seen": 199817348, + "step": 3017 + }, + { + "epoch": 0.28239809051340853, + "loss": 0.8193593621253967, + "loss_ce": 0.007103499956429005, + "loss_iou": 0.33203125, + "loss_num": 0.0296630859375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 199817348, + "step": 3017 + }, + { + "epoch": 0.28249169279730424, + "grad_norm": 25.729848861694336, + "learning_rate": 5e-05, + "loss": 1.4244, + "num_input_tokens_seen": 199883940, + "step": 3018 + }, + { + "epoch": 0.28249169279730424, + "loss": 1.4630744457244873, + "loss_ce": 0.006043207366019487, + "loss_iou": 0.5859375, + "loss_num": 0.056884765625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 199883940, + "step": 3018 + }, + { + "epoch": 0.2825852950812, + "grad_norm": 76.04930114746094, + "learning_rate": 5e-05, + "loss": 1.4571, + "num_input_tokens_seen": 199950904, + "step": 3019 + }, + { + "epoch": 0.2825852950812, + "loss": 1.5464365482330322, + "loss_ce": 0.0054209185764193535, + "loss_iou": 0.6640625, + "loss_num": 0.042724609375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 199950904, + "step": 3019 + }, + { + "epoch": 0.2826788973650957, + "grad_norm": 105.80778503417969, + "learning_rate": 5e-05, + "loss": 1.5398, + "num_input_tokens_seen": 200017584, + "step": 3020 + }, + { + "epoch": 0.2826788973650957, + "loss": 1.6474591493606567, + "loss_ce": 0.0034162087831646204, + "loss_iou": 0.671875, + "loss_num": 0.060546875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 200017584, + "step": 3020 + }, + { + "epoch": 0.2827724996489914, + "grad_norm": 24.542709350585938, + "learning_rate": 5e-05, + "loss": 1.4423, + "num_input_tokens_seen": 200083628, + "step": 3021 + }, + { + "epoch": 0.2827724996489914, + "loss": 1.3654754161834717, + "loss_ce": 0.008297751657664776, + "loss_iou": 0.58984375, + "loss_num": 0.035888671875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 200083628, + "step": 3021 + }, + { + "epoch": 0.28286610193288714, + "grad_norm": 23.94529151916504, + "learning_rate": 5e-05, + "loss": 1.3947, + "num_input_tokens_seen": 200150140, + "step": 3022 + }, + { + "epoch": 0.28286610193288714, + "loss": 1.3242911100387573, + "loss_ce": 0.003490261733531952, + "loss_iou": 0.61328125, + "loss_num": 0.01904296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 200150140, + "step": 3022 + }, + { + "epoch": 0.2829597042167829, + "grad_norm": 26.056406021118164, + "learning_rate": 5e-05, + "loss": 1.2948, + "num_input_tokens_seen": 200216752, + "step": 3023 + }, + { + "epoch": 0.2829597042167829, + "loss": 1.2260204553604126, + "loss_ce": 0.0023876808118075132, + "loss_iou": 0.53125, + "loss_num": 0.031982421875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 200216752, + "step": 3023 + }, + { + "epoch": 0.2830533065006786, + "grad_norm": 37.56977081298828, + "learning_rate": 5e-05, + "loss": 1.694, + "num_input_tokens_seen": 200284168, + "step": 3024 + }, + { + "epoch": 0.2830533065006786, + "loss": 1.7354222536087036, + "loss_ce": 0.0030004363507032394, + "loss_iou": 0.72265625, + "loss_num": 0.057373046875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 200284168, + "step": 3024 + }, + { + "epoch": 0.2831469087845743, + "grad_norm": 21.75420570373535, + "learning_rate": 5e-05, + "loss": 1.2698, + "num_input_tokens_seen": 200350508, + "step": 3025 + }, + { + "epoch": 0.2831469087845743, + "loss": 1.2544282674789429, + "loss_ce": 0.004428272135555744, + "loss_iou": 0.46875, + "loss_num": 0.062255859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 200350508, + "step": 3025 + }, + { + "epoch": 0.2832405110684701, + "grad_norm": 27.25312614440918, + "learning_rate": 5e-05, + "loss": 1.4511, + "num_input_tokens_seen": 200417128, + "step": 3026 + }, + { + "epoch": 0.2832405110684701, + "loss": 1.2957229614257812, + "loss_ce": 0.00812535360455513, + "loss_iou": 0.546875, + "loss_num": 0.038330078125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 200417128, + "step": 3026 + }, + { + "epoch": 0.2833341133523658, + "grad_norm": 31.485076904296875, + "learning_rate": 5e-05, + "loss": 1.3466, + "num_input_tokens_seen": 200483580, + "step": 3027 + }, + { + "epoch": 0.2833341133523658, + "loss": 1.3404892683029175, + "loss_ce": 0.006993155926465988, + "loss_iou": 0.5703125, + "loss_num": 0.038818359375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 200483580, + "step": 3027 + }, + { + "epoch": 0.2834277156362615, + "grad_norm": 23.445817947387695, + "learning_rate": 5e-05, + "loss": 1.2481, + "num_input_tokens_seen": 200548228, + "step": 3028 + }, + { + "epoch": 0.2834277156362615, + "loss": 1.1979817152023315, + "loss_ce": 0.006087195128202438, + "loss_iou": 0.47265625, + "loss_num": 0.0498046875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 200548228, + "step": 3028 + }, + { + "epoch": 0.2835213179201573, + "grad_norm": 9.60848331451416, + "learning_rate": 5e-05, + "loss": 1.07, + "num_input_tokens_seen": 200614080, + "step": 3029 + }, + { + "epoch": 0.2835213179201573, + "loss": 1.2597129344940186, + "loss_ce": 0.009712908416986465, + "loss_iou": 0.50390625, + "loss_num": 0.04833984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 200614080, + "step": 3029 + }, + { + "epoch": 0.283614920204053, + "grad_norm": 56.132328033447266, + "learning_rate": 5e-05, + "loss": 1.3437, + "num_input_tokens_seen": 200681156, + "step": 3030 + }, + { + "epoch": 0.283614920204053, + "loss": 1.1957062482833862, + "loss_ce": 0.007229625713080168, + "loss_iou": 0.484375, + "loss_num": 0.043701171875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 200681156, + "step": 3030 + }, + { + "epoch": 0.2837085224879487, + "grad_norm": 29.766399383544922, + "learning_rate": 5e-05, + "loss": 1.4205, + "num_input_tokens_seen": 200747064, + "step": 3031 + }, + { + "epoch": 0.2837085224879487, + "loss": 1.3620655536651611, + "loss_ce": 0.0026904933620244265, + "loss_iou": 0.48828125, + "loss_num": 0.07666015625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 200747064, + "step": 3031 + }, + { + "epoch": 0.2838021247718444, + "grad_norm": 36.39285659790039, + "learning_rate": 5e-05, + "loss": 1.3372, + "num_input_tokens_seen": 200813232, + "step": 3032 + }, + { + "epoch": 0.2838021247718444, + "loss": 1.2890684604644775, + "loss_ce": 0.004156413953751326, + "loss_iou": 0.55859375, + "loss_num": 0.03369140625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 200813232, + "step": 3032 + }, + { + "epoch": 0.2838957270557402, + "grad_norm": 21.098520278930664, + "learning_rate": 5e-05, + "loss": 1.7289, + "num_input_tokens_seen": 200879408, + "step": 3033 + }, + { + "epoch": 0.2838957270557402, + "loss": 1.7756386995315552, + "loss_ce": 0.006107440683990717, + "loss_iou": 0.734375, + "loss_num": 0.06005859375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 200879408, + "step": 3033 + }, + { + "epoch": 0.2839893293396359, + "grad_norm": 24.60450553894043, + "learning_rate": 5e-05, + "loss": 1.5552, + "num_input_tokens_seen": 200945764, + "step": 3034 + }, + { + "epoch": 0.2839893293396359, + "loss": 1.7291457653045654, + "loss_ce": 0.004536377266049385, + "loss_iou": 0.6796875, + "loss_num": 0.0732421875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 200945764, + "step": 3034 + }, + { + "epoch": 0.2840829316235316, + "grad_norm": 32.24265670776367, + "learning_rate": 5e-05, + "loss": 1.3851, + "num_input_tokens_seen": 201012184, + "step": 3035 + }, + { + "epoch": 0.2840829316235316, + "loss": 1.4628403186798096, + "loss_ce": 0.002879311330616474, + "loss_iou": 0.6171875, + "loss_num": 0.0458984375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 201012184, + "step": 3035 + }, + { + "epoch": 0.28417653390742736, + "grad_norm": 22.662155151367188, + "learning_rate": 5e-05, + "loss": 1.2645, + "num_input_tokens_seen": 201078792, + "step": 3036 + }, + { + "epoch": 0.28417653390742736, + "loss": 1.3859742879867554, + "loss_ce": 0.006091486196964979, + "loss_iou": 0.55859375, + "loss_num": 0.05224609375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 201078792, + "step": 3036 + }, + { + "epoch": 0.2842701361913231, + "grad_norm": 62.40760803222656, + "learning_rate": 5e-05, + "loss": 1.3122, + "num_input_tokens_seen": 201144312, + "step": 3037 + }, + { + "epoch": 0.2842701361913231, + "loss": 1.3730086088180542, + "loss_ce": 0.0038679810240864754, + "loss_iou": 0.58203125, + "loss_num": 0.040771484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 201144312, + "step": 3037 + }, + { + "epoch": 0.2843637384752188, + "grad_norm": 19.97221565246582, + "learning_rate": 5e-05, + "loss": 1.2313, + "num_input_tokens_seen": 201210704, + "step": 3038 + }, + { + "epoch": 0.2843637384752188, + "loss": 1.2948195934295654, + "loss_ce": 0.005757001228630543, + "loss_iou": 0.5390625, + "loss_num": 0.04296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 201210704, + "step": 3038 + }, + { + "epoch": 0.2844573407591145, + "grad_norm": 18.19036293029785, + "learning_rate": 5e-05, + "loss": 1.261, + "num_input_tokens_seen": 201277684, + "step": 3039 + }, + { + "epoch": 0.2844573407591145, + "loss": 1.2203940153121948, + "loss_ce": 0.00262056733481586, + "loss_iou": 0.52734375, + "loss_num": 0.032470703125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 201277684, + "step": 3039 + }, + { + "epoch": 0.28455094304301026, + "grad_norm": 44.580387115478516, + "learning_rate": 5e-05, + "loss": 1.2232, + "num_input_tokens_seen": 201343984, + "step": 3040 + }, + { + "epoch": 0.28455094304301026, + "loss": 1.2985180616378784, + "loss_ce": 0.006037507671862841, + "loss_iou": 0.53125, + "loss_num": 0.046142578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 201343984, + "step": 3040 + }, + { + "epoch": 0.284644545326906, + "grad_norm": 21.293792724609375, + "learning_rate": 5e-05, + "loss": 1.9223, + "num_input_tokens_seen": 201410180, + "step": 3041 + }, + { + "epoch": 0.284644545326906, + "loss": 1.825531244277954, + "loss_ce": 0.008148333057761192, + "loss_iou": 0.7578125, + "loss_num": 0.059814453125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 201410180, + "step": 3041 + }, + { + "epoch": 0.2847381476108017, + "grad_norm": 24.56401252746582, + "learning_rate": 5e-05, + "loss": 1.665, + "num_input_tokens_seen": 201476288, + "step": 3042 + }, + { + "epoch": 0.2847381476108017, + "loss": 1.6926859617233276, + "loss_ce": 0.010068817995488644, + "loss_iou": 0.7109375, + "loss_num": 0.05224609375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 201476288, + "step": 3042 + }, + { + "epoch": 0.28483174989469745, + "grad_norm": 67.64642333984375, + "learning_rate": 5e-05, + "loss": 0.9365, + "num_input_tokens_seen": 201541224, + "step": 3043 + }, + { + "epoch": 0.28483174989469745, + "loss": 1.1663177013397217, + "loss_ce": 0.006253093481063843, + "loss_iou": 0.48828125, + "loss_num": 0.036865234375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 201541224, + "step": 3043 + }, + { + "epoch": 0.28492535217859316, + "grad_norm": 18.67448616027832, + "learning_rate": 5e-05, + "loss": 1.2049, + "num_input_tokens_seen": 201607664, + "step": 3044 + }, + { + "epoch": 0.28492535217859316, + "loss": 1.2268407344818115, + "loss_ce": 0.006259709130972624, + "loss_iou": 0.5078125, + "loss_num": 0.041015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 201607664, + "step": 3044 + }, + { + "epoch": 0.28501895446248887, + "grad_norm": 26.925634384155273, + "learning_rate": 5e-05, + "loss": 1.314, + "num_input_tokens_seen": 201674108, + "step": 3045 + }, + { + "epoch": 0.28501895446248887, + "loss": 1.528472900390625, + "loss_ce": 0.005035434849560261, + "loss_iou": 0.62890625, + "loss_num": 0.053466796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 201674108, + "step": 3045 + }, + { + "epoch": 0.28511255674638464, + "grad_norm": 30.826522827148438, + "learning_rate": 5e-05, + "loss": 1.5354, + "num_input_tokens_seen": 201740684, + "step": 3046 + }, + { + "epoch": 0.28511255674638464, + "loss": 1.604750633239746, + "loss_ce": 0.0075826384127140045, + "loss_iou": 0.6640625, + "loss_num": 0.05419921875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 201740684, + "step": 3046 + }, + { + "epoch": 0.28520615903028035, + "grad_norm": 21.951374053955078, + "learning_rate": 5e-05, + "loss": 1.4433, + "num_input_tokens_seen": 201807480, + "step": 3047 + }, + { + "epoch": 0.28520615903028035, + "loss": 1.377296805381775, + "loss_ce": 0.008156189695000648, + "loss_iou": 0.59375, + "loss_num": 0.0361328125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 201807480, + "step": 3047 + }, + { + "epoch": 0.28529976131417606, + "grad_norm": 39.95161437988281, + "learning_rate": 5e-05, + "loss": 1.5745, + "num_input_tokens_seen": 201874104, + "step": 3048 + }, + { + "epoch": 0.28529976131417606, + "loss": 1.4472811222076416, + "loss_ce": 0.0029452352318912745, + "loss_iou": 0.60546875, + "loss_num": 0.047119140625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 201874104, + "step": 3048 + }, + { + "epoch": 0.28539336359807177, + "grad_norm": 40.374271392822266, + "learning_rate": 5e-05, + "loss": 1.5279, + "num_input_tokens_seen": 201940068, + "step": 3049 + }, + { + "epoch": 0.28539336359807177, + "loss": 1.4617282152175903, + "loss_ce": 0.006161773111671209, + "loss_iou": 0.57421875, + "loss_num": 0.061767578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 201940068, + "step": 3049 + }, + { + "epoch": 0.28548696588196754, + "grad_norm": 20.22443962097168, + "learning_rate": 5e-05, + "loss": 1.4602, + "num_input_tokens_seen": 202006508, + "step": 3050 + }, + { + "epoch": 0.28548696588196754, + "loss": 1.4855684041976929, + "loss_ce": 0.004123128484934568, + "loss_iou": 0.6484375, + "loss_num": 0.037109375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 202006508, + "step": 3050 + }, + { + "epoch": 0.28558056816586325, + "grad_norm": 34.23317337036133, + "learning_rate": 5e-05, + "loss": 1.28, + "num_input_tokens_seen": 202072496, + "step": 3051 + }, + { + "epoch": 0.28558056816586325, + "loss": 1.418210506439209, + "loss_ce": 0.006101028528064489, + "loss_iou": 0.5703125, + "loss_num": 0.055419921875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 202072496, + "step": 3051 + }, + { + "epoch": 0.28567417044975896, + "grad_norm": 37.79703140258789, + "learning_rate": 5e-05, + "loss": 1.4676, + "num_input_tokens_seen": 202138636, + "step": 3052 + }, + { + "epoch": 0.28567417044975896, + "loss": 1.6764185428619385, + "loss_ce": 0.0055201370269060135, + "loss_iou": 0.6796875, + "loss_num": 0.0625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 202138636, + "step": 3052 + }, + { + "epoch": 0.2857677727336547, + "grad_norm": 21.096168518066406, + "learning_rate": 5e-05, + "loss": 1.6136, + "num_input_tokens_seen": 202204992, + "step": 3053 + }, + { + "epoch": 0.2857677727336547, + "loss": 1.8560161590576172, + "loss_ce": 0.006406885571777821, + "loss_iou": 0.75, + "loss_num": 0.06884765625, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 202204992, + "step": 3053 + }, + { + "epoch": 0.28586137501755043, + "grad_norm": 18.430843353271484, + "learning_rate": 5e-05, + "loss": 1.2172, + "num_input_tokens_seen": 202271552, + "step": 3054 + }, + { + "epoch": 0.28586137501755043, + "loss": 1.1434144973754883, + "loss_ce": 0.00523093156516552, + "loss_iou": 0.48046875, + "loss_num": 0.035400390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 202271552, + "step": 3054 + }, + { + "epoch": 0.28595497730144614, + "grad_norm": 17.02061653137207, + "learning_rate": 5e-05, + "loss": 1.2744, + "num_input_tokens_seen": 202337792, + "step": 3055 + }, + { + "epoch": 0.28595497730144614, + "loss": 1.2045364379882812, + "loss_ce": 0.008735625073313713, + "loss_iou": 0.49609375, + "loss_num": 0.041015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 202337792, + "step": 3055 + }, + { + "epoch": 0.28604857958534186, + "grad_norm": 33.488468170166016, + "learning_rate": 5e-05, + "loss": 1.1886, + "num_input_tokens_seen": 202405424, + "step": 3056 + }, + { + "epoch": 0.28604857958534186, + "loss": 1.312138557434082, + "loss_ce": 0.0045213233679533005, + "loss_iou": 0.54296875, + "loss_num": 0.0439453125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 202405424, + "step": 3056 + }, + { + "epoch": 0.2861421818692376, + "grad_norm": 23.083343505859375, + "learning_rate": 5e-05, + "loss": 1.4977, + "num_input_tokens_seen": 202471848, + "step": 3057 + }, + { + "epoch": 0.2861421818692376, + "loss": 1.3633100986480713, + "loss_ce": 0.002958590630441904, + "loss_iou": 0.60546875, + "loss_num": 0.029541015625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 202471848, + "step": 3057 + }, + { + "epoch": 0.28623578415313333, + "grad_norm": 21.281354904174805, + "learning_rate": 5e-05, + "loss": 1.2358, + "num_input_tokens_seen": 202538592, + "step": 3058 + }, + { + "epoch": 0.28623578415313333, + "loss": 1.132489562034607, + "loss_ce": 0.004559872671961784, + "loss_iou": 0.47265625, + "loss_num": 0.036376953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 202538592, + "step": 3058 + }, + { + "epoch": 0.28632938643702904, + "grad_norm": 20.42490005493164, + "learning_rate": 5e-05, + "loss": 1.2342, + "num_input_tokens_seen": 202605584, + "step": 3059 + }, + { + "epoch": 0.28632938643702904, + "loss": 1.2003170251846313, + "loss_ce": 0.002074801828712225, + "loss_iou": 0.52734375, + "loss_num": 0.029052734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 202605584, + "step": 3059 + }, + { + "epoch": 0.2864229887209248, + "grad_norm": 35.977149963378906, + "learning_rate": 5e-05, + "loss": 1.1665, + "num_input_tokens_seen": 202670260, + "step": 3060 + }, + { + "epoch": 0.2864229887209248, + "loss": 1.1984047889709473, + "loss_ce": 0.00504543911665678, + "loss_iou": 0.478515625, + "loss_num": 0.04736328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 202670260, + "step": 3060 + }, + { + "epoch": 0.2865165910048205, + "grad_norm": 30.093826293945312, + "learning_rate": 5e-05, + "loss": 1.4555, + "num_input_tokens_seen": 202735752, + "step": 3061 + }, + { + "epoch": 0.2865165910048205, + "loss": 1.424020528793335, + "loss_ce": 0.0070283068343997, + "loss_iou": 0.5234375, + "loss_num": 0.07373046875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 202735752, + "step": 3061 + }, + { + "epoch": 0.28661019328871623, + "grad_norm": 37.46656799316406, + "learning_rate": 5e-05, + "loss": 1.4962, + "num_input_tokens_seen": 202802616, + "step": 3062 + }, + { + "epoch": 0.28661019328871623, + "loss": 1.8160855770111084, + "loss_ce": 0.003585491795092821, + "loss_iou": 0.76171875, + "loss_num": 0.0576171875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 202802616, + "step": 3062 + }, + { + "epoch": 0.286703795572612, + "grad_norm": 19.977628707885742, + "learning_rate": 5e-05, + "loss": 1.5627, + "num_input_tokens_seen": 202869176, + "step": 3063 + }, + { + "epoch": 0.286703795572612, + "loss": 1.564035177230835, + "loss_ce": 0.007882889360189438, + "loss_iou": 0.65234375, + "loss_num": 0.05078125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 202869176, + "step": 3063 + }, + { + "epoch": 0.2867973978565077, + "grad_norm": 185.08062744140625, + "learning_rate": 5e-05, + "loss": 1.4283, + "num_input_tokens_seen": 202935300, + "step": 3064 + }, + { + "epoch": 0.2867973978565077, + "loss": 1.2464423179626465, + "loss_ce": 0.004254757426679134, + "loss_iou": 0.54296875, + "loss_num": 0.0311279296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 202935300, + "step": 3064 + }, + { + "epoch": 0.2868910001404034, + "grad_norm": 23.055692672729492, + "learning_rate": 5e-05, + "loss": 1.3364, + "num_input_tokens_seen": 203001632, + "step": 3065 + }, + { + "epoch": 0.2868910001404034, + "loss": 1.378543496131897, + "loss_ce": 0.006473171524703503, + "loss_iou": 0.59765625, + "loss_num": 0.03466796875, + "loss_xval": 1.375, + "num_input_tokens_seen": 203001632, + "step": 3065 + }, + { + "epoch": 0.28698460242429913, + "grad_norm": 30.766271591186523, + "learning_rate": 5e-05, + "loss": 1.2575, + "num_input_tokens_seen": 203068964, + "step": 3066 + }, + { + "epoch": 0.28698460242429913, + "loss": 1.3517169952392578, + "loss_ce": 0.0030842546839267015, + "loss_iou": 0.58984375, + "loss_num": 0.034423828125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 203068964, + "step": 3066 + }, + { + "epoch": 0.2870782047081949, + "grad_norm": 35.7118034362793, + "learning_rate": 5e-05, + "loss": 1.3582, + "num_input_tokens_seen": 203135280, + "step": 3067 + }, + { + "epoch": 0.2870782047081949, + "loss": 1.181583285331726, + "loss_ce": 0.006290363147854805, + "loss_iou": 0.5234375, + "loss_num": 0.0263671875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 203135280, + "step": 3067 + }, + { + "epoch": 0.2871718069920906, + "grad_norm": 17.745744705200195, + "learning_rate": 5e-05, + "loss": 1.3908, + "num_input_tokens_seen": 203200880, + "step": 3068 + }, + { + "epoch": 0.2871718069920906, + "loss": 1.23586905002594, + "loss_ce": 0.00552239827811718, + "loss_iou": 0.482421875, + "loss_num": 0.053466796875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 203200880, + "step": 3068 + }, + { + "epoch": 0.2872654092759863, + "grad_norm": 21.535432815551758, + "learning_rate": 5e-05, + "loss": 1.3066, + "num_input_tokens_seen": 203267708, + "step": 3069 + }, + { + "epoch": 0.2872654092759863, + "loss": 1.5727143287658691, + "loss_ce": 0.008261275477707386, + "loss_iou": 0.6171875, + "loss_num": 0.06591796875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 203267708, + "step": 3069 + }, + { + "epoch": 0.2873590115598821, + "grad_norm": 24.41970443725586, + "learning_rate": 5e-05, + "loss": 1.3044, + "num_input_tokens_seen": 203334544, + "step": 3070 + }, + { + "epoch": 0.2873590115598821, + "loss": 1.2093100547790527, + "loss_ce": 0.005696695763617754, + "loss_iou": 0.5078125, + "loss_num": 0.03759765625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 203334544, + "step": 3070 + }, + { + "epoch": 0.2874526138437778, + "grad_norm": 35.85791015625, + "learning_rate": 5e-05, + "loss": 1.4133, + "num_input_tokens_seen": 203401620, + "step": 3071 + }, + { + "epoch": 0.2874526138437778, + "loss": 1.2298303842544556, + "loss_ce": 0.003267889376729727, + "loss_iou": 0.515625, + "loss_num": 0.038818359375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 203401620, + "step": 3071 + }, + { + "epoch": 0.2875462161276735, + "grad_norm": 186.5697784423828, + "learning_rate": 5e-05, + "loss": 1.3013, + "num_input_tokens_seen": 203466636, + "step": 3072 + }, + { + "epoch": 0.2875462161276735, + "loss": 1.560002326965332, + "loss_ce": 0.005314810201525688, + "loss_iou": 0.609375, + "loss_num": 0.06787109375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 203466636, + "step": 3072 + }, + { + "epoch": 0.2876398184115692, + "grad_norm": 21.278427124023438, + "learning_rate": 5e-05, + "loss": 1.1918, + "num_input_tokens_seen": 203532620, + "step": 3073 + }, + { + "epoch": 0.2876398184115692, + "loss": 1.1815638542175293, + "loss_ce": 0.0032802638597786427, + "loss_iou": 0.484375, + "loss_num": 0.041748046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 203532620, + "step": 3073 + }, + { + "epoch": 0.287733420695465, + "grad_norm": 22.485498428344727, + "learning_rate": 5e-05, + "loss": 1.4908, + "num_input_tokens_seen": 203599092, + "step": 3074 + }, + { + "epoch": 0.287733420695465, + "loss": 1.5560481548309326, + "loss_ce": 0.004290410317480564, + "loss_iou": 0.62109375, + "loss_num": 0.061279296875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 203599092, + "step": 3074 + }, + { + "epoch": 0.2878270229793607, + "grad_norm": 37.10167694091797, + "learning_rate": 5e-05, + "loss": 1.7451, + "num_input_tokens_seen": 203666568, + "step": 3075 + }, + { + "epoch": 0.2878270229793607, + "loss": 1.476119041442871, + "loss_ce": 0.003462723223492503, + "loss_iou": 0.6015625, + "loss_num": 0.053955078125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 203666568, + "step": 3075 + }, + { + "epoch": 0.2879206252632564, + "grad_norm": 20.182947158813477, + "learning_rate": 5e-05, + "loss": 1.4946, + "num_input_tokens_seen": 203732500, + "step": 3076 + }, + { + "epoch": 0.2879206252632564, + "loss": 1.595193862915039, + "loss_ce": 0.006326563656330109, + "loss_iou": 0.6640625, + "loss_num": 0.05126953125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 203732500, + "step": 3076 + }, + { + "epoch": 0.28801422754715217, + "grad_norm": 30.674283981323242, + "learning_rate": 5e-05, + "loss": 1.2873, + "num_input_tokens_seen": 203799168, + "step": 3077 + }, + { + "epoch": 0.28801422754715217, + "loss": 1.2293314933776855, + "loss_ce": 0.004722068086266518, + "loss_iou": 0.51171875, + "loss_num": 0.040283203125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 203799168, + "step": 3077 + }, + { + "epoch": 0.2881078298310479, + "grad_norm": 31.837526321411133, + "learning_rate": 5e-05, + "loss": 1.3774, + "num_input_tokens_seen": 203865784, + "step": 3078 + }, + { + "epoch": 0.2881078298310479, + "loss": 1.2486945390701294, + "loss_ce": 0.0021125266794115305, + "loss_iou": 0.51953125, + "loss_num": 0.040771484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 203865784, + "step": 3078 + }, + { + "epoch": 0.2882014321149436, + "grad_norm": 23.467811584472656, + "learning_rate": 5e-05, + "loss": 1.7556, + "num_input_tokens_seen": 203930564, + "step": 3079 + }, + { + "epoch": 0.2882014321149436, + "loss": 1.7434748411178589, + "loss_ce": 0.0032404691446572542, + "loss_iou": 0.69140625, + "loss_num": 0.07177734375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 203930564, + "step": 3079 + }, + { + "epoch": 0.28829503439883936, + "grad_norm": 28.333711624145508, + "learning_rate": 5e-05, + "loss": 1.1921, + "num_input_tokens_seen": 203997980, + "step": 3080 + }, + { + "epoch": 0.28829503439883936, + "loss": 1.235658884048462, + "loss_ce": 0.004701939411461353, + "loss_iou": 0.515625, + "loss_num": 0.03955078125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 203997980, + "step": 3080 + }, + { + "epoch": 0.28838863668273507, + "grad_norm": 23.09746742248535, + "learning_rate": 5e-05, + "loss": 1.3955, + "num_input_tokens_seen": 204063484, + "step": 3081 + }, + { + "epoch": 0.28838863668273507, + "loss": 1.5260694026947021, + "loss_ce": 0.0075147938914597034, + "loss_iou": 0.59375, + "loss_num": 0.06689453125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 204063484, + "step": 3081 + }, + { + "epoch": 0.2884822389666308, + "grad_norm": 32.867923736572266, + "learning_rate": 5e-05, + "loss": 1.5281, + "num_input_tokens_seen": 204129316, + "step": 3082 + }, + { + "epoch": 0.2884822389666308, + "loss": 1.2687959671020508, + "loss_ce": 0.005124174989759922, + "loss_iou": 0.546875, + "loss_num": 0.033935546875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 204129316, + "step": 3082 + }, + { + "epoch": 0.2885758412505265, + "grad_norm": 29.352083206176758, + "learning_rate": 5e-05, + "loss": 1.4065, + "num_input_tokens_seen": 204196324, + "step": 3083 + }, + { + "epoch": 0.2885758412505265, + "loss": 1.4482903480529785, + "loss_ce": 0.005419221706688404, + "loss_iou": 0.640625, + "loss_num": 0.03173828125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 204196324, + "step": 3083 + }, + { + "epoch": 0.28866944353442225, + "grad_norm": 12.212352752685547, + "learning_rate": 5e-05, + "loss": 1.0674, + "num_input_tokens_seen": 204263408, + "step": 3084 + }, + { + "epoch": 0.28866944353442225, + "loss": 0.9747363328933716, + "loss_ce": 0.005986327771097422, + "loss_iou": 0.408203125, + "loss_num": 0.0303955078125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 204263408, + "step": 3084 + }, + { + "epoch": 0.28876304581831796, + "grad_norm": 28.14958381652832, + "learning_rate": 5e-05, + "loss": 1.4128, + "num_input_tokens_seen": 204330552, + "step": 3085 + }, + { + "epoch": 0.28876304581831796, + "loss": 1.2859044075012207, + "loss_ce": 0.004654408432543278, + "loss_iou": 0.5234375, + "loss_num": 0.046875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 204330552, + "step": 3085 + }, + { + "epoch": 0.2888566481022137, + "grad_norm": 30.277748107910156, + "learning_rate": 5e-05, + "loss": 1.4481, + "num_input_tokens_seen": 204398292, + "step": 3086 + }, + { + "epoch": 0.2888566481022137, + "loss": 1.2599860429763794, + "loss_ce": 0.0016852568369358778, + "loss_iou": 0.54296875, + "loss_num": 0.034423828125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 204398292, + "step": 3086 + }, + { + "epoch": 0.28895025038610944, + "grad_norm": 27.69671058654785, + "learning_rate": 5e-05, + "loss": 1.2157, + "num_input_tokens_seen": 204463368, + "step": 3087 + }, + { + "epoch": 0.28895025038610944, + "loss": 1.235903024673462, + "loss_ce": 0.004442431032657623, + "loss_iou": 0.51953125, + "loss_num": 0.038818359375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 204463368, + "step": 3087 + }, + { + "epoch": 0.28904385267000515, + "grad_norm": 34.46951675415039, + "learning_rate": 5e-05, + "loss": 1.267, + "num_input_tokens_seen": 204529540, + "step": 3088 + }, + { + "epoch": 0.28904385267000515, + "loss": 1.1254793405532837, + "loss_ce": 0.005362133029848337, + "loss_iou": 0.484375, + "loss_num": 0.0303955078125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 204529540, + "step": 3088 + }, + { + "epoch": 0.28913745495390086, + "grad_norm": 26.055301666259766, + "learning_rate": 5e-05, + "loss": 1.419, + "num_input_tokens_seen": 204595392, + "step": 3089 + }, + { + "epoch": 0.28913745495390086, + "loss": 1.349999189376831, + "loss_ce": 0.0042960201390087605, + "loss_iou": 0.55078125, + "loss_num": 0.048095703125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 204595392, + "step": 3089 + }, + { + "epoch": 0.28923105723779663, + "grad_norm": 34.675899505615234, + "learning_rate": 5e-05, + "loss": 1.2665, + "num_input_tokens_seen": 204661588, + "step": 3090 + }, + { + "epoch": 0.28923105723779663, + "loss": 1.447137475013733, + "loss_ce": 0.003289783839136362, + "loss_iou": 0.625, + "loss_num": 0.03955078125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 204661588, + "step": 3090 + }, + { + "epoch": 0.28932465952169234, + "grad_norm": 21.149599075317383, + "learning_rate": 5e-05, + "loss": 1.4622, + "num_input_tokens_seen": 204727372, + "step": 3091 + }, + { + "epoch": 0.28932465952169234, + "loss": 1.498487949371338, + "loss_ce": 0.0053239320404827595, + "loss_iou": 0.62890625, + "loss_num": 0.046875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 204727372, + "step": 3091 + }, + { + "epoch": 0.28941826180558805, + "grad_norm": 18.59107780456543, + "learning_rate": 5e-05, + "loss": 1.3873, + "num_input_tokens_seen": 204793768, + "step": 3092 + }, + { + "epoch": 0.28941826180558805, + "loss": 1.2568249702453613, + "loss_ce": 0.008045699447393417, + "loss_iou": 0.48046875, + "loss_num": 0.057373046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 204793768, + "step": 3092 + }, + { + "epoch": 0.28951186408948376, + "grad_norm": 15.865816116333008, + "learning_rate": 5e-05, + "loss": 1.2732, + "num_input_tokens_seen": 204859644, + "step": 3093 + }, + { + "epoch": 0.28951186408948376, + "loss": 1.2384302616119385, + "loss_ce": 0.014309210702776909, + "loss_iou": 0.51171875, + "loss_num": 0.0400390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 204859644, + "step": 3093 + }, + { + "epoch": 0.2896054663733795, + "grad_norm": 44.99957275390625, + "learning_rate": 5e-05, + "loss": 1.2151, + "num_input_tokens_seen": 204927096, + "step": 3094 + }, + { + "epoch": 0.2896054663733795, + "loss": 1.2121392488479614, + "loss_ce": 0.007549415808171034, + "loss_iou": 0.50390625, + "loss_num": 0.0390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 204927096, + "step": 3094 + }, + { + "epoch": 0.28969906865727524, + "grad_norm": 33.015228271484375, + "learning_rate": 5e-05, + "loss": 1.4204, + "num_input_tokens_seen": 204993976, + "step": 3095 + }, + { + "epoch": 0.28969906865727524, + "loss": 1.4124513864517212, + "loss_ce": 0.006201413460075855, + "loss_iou": 0.57421875, + "loss_num": 0.051513671875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 204993976, + "step": 3095 + }, + { + "epoch": 0.28979267094117095, + "grad_norm": 29.203514099121094, + "learning_rate": 5e-05, + "loss": 1.5594, + "num_input_tokens_seen": 205059972, + "step": 3096 + }, + { + "epoch": 0.28979267094117095, + "loss": 1.5423778295516968, + "loss_ce": 0.007221626117825508, + "loss_iou": 0.64453125, + "loss_num": 0.0498046875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 205059972, + "step": 3096 + }, + { + "epoch": 0.2898862732250667, + "grad_norm": 42.84995651245117, + "learning_rate": 5e-05, + "loss": 1.4387, + "num_input_tokens_seen": 205126732, + "step": 3097 + }, + { + "epoch": 0.2898862732250667, + "loss": 1.4812512397766113, + "loss_ce": 0.004688835237175226, + "loss_iou": 0.55859375, + "loss_num": 0.07177734375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 205126732, + "step": 3097 + }, + { + "epoch": 0.2899798755089624, + "grad_norm": 14.987178802490234, + "learning_rate": 5e-05, + "loss": 1.1734, + "num_input_tokens_seen": 205191992, + "step": 3098 + }, + { + "epoch": 0.2899798755089624, + "loss": 1.015680193901062, + "loss_ce": 0.0032289689406752586, + "loss_iou": 0.412109375, + "loss_num": 0.03759765625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 205191992, + "step": 3098 + }, + { + "epoch": 0.29007347779285814, + "grad_norm": 29.289453506469727, + "learning_rate": 5e-05, + "loss": 1.2393, + "num_input_tokens_seen": 205259848, + "step": 3099 + }, + { + "epoch": 0.29007347779285814, + "loss": 1.2359817028045654, + "loss_ce": 0.002094983123242855, + "loss_iou": 0.55859375, + "loss_num": 0.02294921875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 205259848, + "step": 3099 + }, + { + "epoch": 0.29016708007675385, + "grad_norm": 33.743431091308594, + "learning_rate": 5e-05, + "loss": 1.5154, + "num_input_tokens_seen": 205326768, + "step": 3100 + }, + { + "epoch": 0.29016708007675385, + "loss": 1.6340134143829346, + "loss_ce": 0.010966559872031212, + "loss_iou": 0.65234375, + "loss_num": 0.06396484375, + "loss_xval": 1.625, + "num_input_tokens_seen": 205326768, + "step": 3100 + }, + { + "epoch": 0.2902606823606496, + "grad_norm": 27.555036544799805, + "learning_rate": 5e-05, + "loss": 1.3413, + "num_input_tokens_seen": 205392152, + "step": 3101 + }, + { + "epoch": 0.2902606823606496, + "loss": 1.3167400360107422, + "loss_ce": 0.0032635091338306665, + "loss_iou": 0.5625, + "loss_num": 0.03857421875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 205392152, + "step": 3101 + }, + { + "epoch": 0.2903542846445453, + "grad_norm": 78.1739273071289, + "learning_rate": 5e-05, + "loss": 1.2446, + "num_input_tokens_seen": 205458220, + "step": 3102 + }, + { + "epoch": 0.2903542846445453, + "loss": 1.2450942993164062, + "loss_ce": 0.004859840031713247, + "loss_iou": 0.53125, + "loss_num": 0.03564453125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 205458220, + "step": 3102 + }, + { + "epoch": 0.29044788692844103, + "grad_norm": 41.430233001708984, + "learning_rate": 5e-05, + "loss": 1.3613, + "num_input_tokens_seen": 205525000, + "step": 3103 + }, + { + "epoch": 0.29044788692844103, + "loss": 1.4686214923858643, + "loss_ce": 0.008111229166388512, + "loss_iou": 0.609375, + "loss_num": 0.048828125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 205525000, + "step": 3103 + }, + { + "epoch": 0.2905414892123368, + "grad_norm": 48.65542984008789, + "learning_rate": 5e-05, + "loss": 1.1957, + "num_input_tokens_seen": 205591060, + "step": 3104 + }, + { + "epoch": 0.2905414892123368, + "loss": 1.1052707433700562, + "loss_ce": 0.004196567926555872, + "loss_iou": 0.470703125, + "loss_num": 0.03173828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 205591060, + "step": 3104 + }, + { + "epoch": 0.2906350914962325, + "grad_norm": 23.19552993774414, + "learning_rate": 5e-05, + "loss": 1.4354, + "num_input_tokens_seen": 205657600, + "step": 3105 + }, + { + "epoch": 0.2906350914962325, + "loss": 1.42152738571167, + "loss_ce": 0.008929755538702011, + "loss_iou": 0.609375, + "loss_num": 0.039306640625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 205657600, + "step": 3105 + }, + { + "epoch": 0.2907286937801282, + "grad_norm": 47.86252212524414, + "learning_rate": 5e-05, + "loss": 1.2118, + "num_input_tokens_seen": 205724056, + "step": 3106 + }, + { + "epoch": 0.2907286937801282, + "loss": 1.1531574726104736, + "loss_ce": 0.006673100404441357, + "loss_iou": 0.478515625, + "loss_num": 0.0380859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 205724056, + "step": 3106 + }, + { + "epoch": 0.290822296064024, + "grad_norm": 26.101696014404297, + "learning_rate": 5e-05, + "loss": 1.4201, + "num_input_tokens_seen": 205790660, + "step": 3107 + }, + { + "epoch": 0.290822296064024, + "loss": 1.4461697340011597, + "loss_ce": 0.005251740105450153, + "loss_iou": 0.62890625, + "loss_num": 0.036865234375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 205790660, + "step": 3107 + }, + { + "epoch": 0.2909158983479197, + "grad_norm": 31.460783004760742, + "learning_rate": 5e-05, + "loss": 1.2205, + "num_input_tokens_seen": 205857132, + "step": 3108 + }, + { + "epoch": 0.2909158983479197, + "loss": 1.2419980764389038, + "loss_ce": 0.005669943522661924, + "loss_iou": 0.51953125, + "loss_num": 0.03955078125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 205857132, + "step": 3108 + }, + { + "epoch": 0.2910095006318154, + "grad_norm": 21.015045166015625, + "learning_rate": 5e-05, + "loss": 1.5869, + "num_input_tokens_seen": 205922040, + "step": 3109 + }, + { + "epoch": 0.2910095006318154, + "loss": 1.64469575881958, + "loss_ce": 0.008953599259257317, + "loss_iou": 0.66015625, + "loss_num": 0.0634765625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 205922040, + "step": 3109 + }, + { + "epoch": 0.2911031029157111, + "grad_norm": 17.923660278320312, + "learning_rate": 5e-05, + "loss": 1.4023, + "num_input_tokens_seen": 205988840, + "step": 3110 + }, + { + "epoch": 0.2911031029157111, + "loss": 1.5299301147460938, + "loss_ce": 0.004539435263723135, + "loss_iou": 0.578125, + "loss_num": 0.07421875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 205988840, + "step": 3110 + }, + { + "epoch": 0.2911967051996069, + "grad_norm": 26.57171058654785, + "learning_rate": 5e-05, + "loss": 1.1806, + "num_input_tokens_seen": 206055384, + "step": 3111 + }, + { + "epoch": 0.2911967051996069, + "loss": 1.19579017162323, + "loss_ce": 0.006336994934827089, + "loss_iou": 0.51171875, + "loss_num": 0.033935546875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 206055384, + "step": 3111 + }, + { + "epoch": 0.2912903074835026, + "grad_norm": 18.2117862701416, + "learning_rate": 5e-05, + "loss": 1.2085, + "num_input_tokens_seen": 206121116, + "step": 3112 + }, + { + "epoch": 0.2912903074835026, + "loss": 1.0964746475219727, + "loss_ce": 0.005654362961649895, + "loss_iou": 0.455078125, + "loss_num": 0.0361328125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 206121116, + "step": 3112 + }, + { + "epoch": 0.2913839097673983, + "grad_norm": 11.866911888122559, + "learning_rate": 5e-05, + "loss": 1.1127, + "num_input_tokens_seen": 206188104, + "step": 3113 + }, + { + "epoch": 0.2913839097673983, + "loss": 0.9775887727737427, + "loss_ce": 0.003955947235226631, + "loss_iou": 0.357421875, + "loss_num": 0.05224609375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 206188104, + "step": 3113 + }, + { + "epoch": 0.2914775120512941, + "grad_norm": 24.979978561401367, + "learning_rate": 5e-05, + "loss": 1.4213, + "num_input_tokens_seen": 206253328, + "step": 3114 + }, + { + "epoch": 0.2914775120512941, + "loss": 1.524076223373413, + "loss_ce": 0.004545051604509354, + "loss_iou": 0.640625, + "loss_num": 0.047607421875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 206253328, + "step": 3114 + }, + { + "epoch": 0.2915711143351898, + "grad_norm": 63.859676361083984, + "learning_rate": 5e-05, + "loss": 1.2491, + "num_input_tokens_seen": 206319064, + "step": 3115 + }, + { + "epoch": 0.2915711143351898, + "loss": 1.3842346668243408, + "loss_ce": 0.005328364670276642, + "loss_iou": 0.625, + "loss_num": 0.0263671875, + "loss_xval": 1.375, + "num_input_tokens_seen": 206319064, + "step": 3115 + }, + { + "epoch": 0.2916647166190855, + "grad_norm": 25.727230072021484, + "learning_rate": 5e-05, + "loss": 1.65, + "num_input_tokens_seen": 206385376, + "step": 3116 + }, + { + "epoch": 0.2916647166190855, + "loss": 1.6381309032440186, + "loss_ce": 0.0062949820421636105, + "loss_iou": 0.6796875, + "loss_num": 0.054931640625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 206385376, + "step": 3116 + }, + { + "epoch": 0.2917583189029812, + "grad_norm": 29.74369239807129, + "learning_rate": 5e-05, + "loss": 1.2582, + "num_input_tokens_seen": 206452084, + "step": 3117 + }, + { + "epoch": 0.2917583189029812, + "loss": 1.0707981586456299, + "loss_ce": 0.007809832692146301, + "loss_iou": 0.43359375, + "loss_num": 0.0390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 206452084, + "step": 3117 + }, + { + "epoch": 0.291851921186877, + "grad_norm": 41.445213317871094, + "learning_rate": 5e-05, + "loss": 1.6517, + "num_input_tokens_seen": 206519040, + "step": 3118 + }, + { + "epoch": 0.291851921186877, + "loss": 1.6897058486938477, + "loss_ce": 0.007088775746524334, + "loss_iou": 0.67578125, + "loss_num": 0.06591796875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 206519040, + "step": 3118 + }, + { + "epoch": 0.2919455234707727, + "grad_norm": 20.62035369873047, + "learning_rate": 5e-05, + "loss": 1.2956, + "num_input_tokens_seen": 206584924, + "step": 3119 + }, + { + "epoch": 0.2919455234707727, + "loss": 1.4671156406402588, + "loss_ce": 0.0032485707197338343, + "loss_iou": 0.56640625, + "loss_num": 0.0673828125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 206584924, + "step": 3119 + }, + { + "epoch": 0.2920391257546684, + "grad_norm": 28.10143280029297, + "learning_rate": 5e-05, + "loss": 1.4116, + "num_input_tokens_seen": 206651260, + "step": 3120 + }, + { + "epoch": 0.2920391257546684, + "loss": 1.3090413808822632, + "loss_ce": 0.0024007961619645357, + "loss_iou": 0.546875, + "loss_num": 0.0419921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 206651260, + "step": 3120 + }, + { + "epoch": 0.29213272803856416, + "grad_norm": 31.868993759155273, + "learning_rate": 5e-05, + "loss": 1.3152, + "num_input_tokens_seen": 206717704, + "step": 3121 + }, + { + "epoch": 0.29213272803856416, + "loss": 1.4792391061782837, + "loss_ce": 0.0046297162771224976, + "loss_iou": 0.6015625, + "loss_num": 0.05419921875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 206717704, + "step": 3121 + }, + { + "epoch": 0.29222633032245987, + "grad_norm": 51.99101638793945, + "learning_rate": 5e-05, + "loss": 1.7342, + "num_input_tokens_seen": 206784936, + "step": 3122 + }, + { + "epoch": 0.29222633032245987, + "loss": 1.75465726852417, + "loss_ce": 0.00661042146384716, + "loss_iou": 0.6953125, + "loss_num": 0.0712890625, + "loss_xval": 1.75, + "num_input_tokens_seen": 206784936, + "step": 3122 + }, + { + "epoch": 0.2923199326063556, + "grad_norm": 25.420894622802734, + "learning_rate": 5e-05, + "loss": 1.2445, + "num_input_tokens_seen": 206850900, + "step": 3123 + }, + { + "epoch": 0.2923199326063556, + "loss": 1.2743898630142212, + "loss_ce": 0.005346850026398897, + "loss_iou": 0.52734375, + "loss_num": 0.042724609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 206850900, + "step": 3123 + }, + { + "epoch": 0.29241353489025135, + "grad_norm": 28.05361557006836, + "learning_rate": 5e-05, + "loss": 1.1967, + "num_input_tokens_seen": 206916476, + "step": 3124 + }, + { + "epoch": 0.29241353489025135, + "loss": 0.8975913524627686, + "loss_ce": 0.0030601024627685547, + "loss_iou": 0.37890625, + "loss_num": 0.027587890625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 206916476, + "step": 3124 + }, + { + "epoch": 0.29250713717414706, + "grad_norm": 50.064666748046875, + "learning_rate": 5e-05, + "loss": 1.2817, + "num_input_tokens_seen": 206982304, + "step": 3125 + }, + { + "epoch": 0.29250713717414706, + "loss": 1.3855113983154297, + "loss_ce": 0.006605205126106739, + "loss_iou": 0.55859375, + "loss_num": 0.052978515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 206982304, + "step": 3125 + }, + { + "epoch": 0.29260073945804277, + "grad_norm": 25.029882431030273, + "learning_rate": 5e-05, + "loss": 1.5905, + "num_input_tokens_seen": 207049428, + "step": 3126 + }, + { + "epoch": 0.29260073945804277, + "loss": 1.6488122940063477, + "loss_ce": 0.003304490353912115, + "loss_iou": 0.6796875, + "loss_num": 0.057373046875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 207049428, + "step": 3126 + }, + { + "epoch": 0.2926943417419385, + "grad_norm": 17.093364715576172, + "learning_rate": 5e-05, + "loss": 1.0913, + "num_input_tokens_seen": 207115056, + "step": 3127 + }, + { + "epoch": 0.2926943417419385, + "loss": 1.0598585605621338, + "loss_ce": 0.001509041991084814, + "loss_iou": 0.43359375, + "loss_num": 0.03857421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 207115056, + "step": 3127 + }, + { + "epoch": 0.29278794402583425, + "grad_norm": 118.50226593017578, + "learning_rate": 5e-05, + "loss": 1.4114, + "num_input_tokens_seen": 207181452, + "step": 3128 + }, + { + "epoch": 0.29278794402583425, + "loss": 1.3256511688232422, + "loss_ce": 0.008268414065241814, + "loss_iou": 0.53515625, + "loss_num": 0.04931640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 207181452, + "step": 3128 + }, + { + "epoch": 0.29288154630972996, + "grad_norm": 22.6712589263916, + "learning_rate": 5e-05, + "loss": 1.3039, + "num_input_tokens_seen": 207247408, + "step": 3129 + }, + { + "epoch": 0.29288154630972996, + "loss": 1.2579773664474487, + "loss_ce": 0.005047669634222984, + "loss_iou": 0.5078125, + "loss_num": 0.04736328125, + "loss_xval": 1.25, + "num_input_tokens_seen": 207247408, + "step": 3129 + }, + { + "epoch": 0.29297514859362567, + "grad_norm": 31.113643646240234, + "learning_rate": 5e-05, + "loss": 1.2598, + "num_input_tokens_seen": 207312496, + "step": 3130 + }, + { + "epoch": 0.29297514859362567, + "loss": 1.1999468803405762, + "loss_ce": 0.004634324926882982, + "loss_iou": 0.515625, + "loss_num": 0.03369140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 207312496, + "step": 3130 + }, + { + "epoch": 0.29306875087752143, + "grad_norm": 22.540021896362305, + "learning_rate": 5e-05, + "loss": 1.474, + "num_input_tokens_seen": 207379652, + "step": 3131 + }, + { + "epoch": 0.29306875087752143, + "loss": 1.3370617628097534, + "loss_ce": 0.004542237147688866, + "loss_iou": 0.53125, + "loss_num": 0.0537109375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 207379652, + "step": 3131 + }, + { + "epoch": 0.29316235316141714, + "grad_norm": 20.153026580810547, + "learning_rate": 5e-05, + "loss": 1.2633, + "num_input_tokens_seen": 207445852, + "step": 3132 + }, + { + "epoch": 0.29316235316141714, + "loss": 1.3043044805526733, + "loss_ce": 0.009382598102092743, + "loss_iou": 0.52734375, + "loss_num": 0.048095703125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 207445852, + "step": 3132 + }, + { + "epoch": 0.29325595544531285, + "grad_norm": 17.833038330078125, + "learning_rate": 5e-05, + "loss": 1.2594, + "num_input_tokens_seen": 207511536, + "step": 3133 + }, + { + "epoch": 0.29325595544531285, + "loss": 1.2994135618209839, + "loss_ce": 0.003026842838153243, + "loss_iou": 0.46875, + "loss_num": 0.07177734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 207511536, + "step": 3133 + }, + { + "epoch": 0.29334955772920857, + "grad_norm": 25.51861572265625, + "learning_rate": 5e-05, + "loss": 1.1744, + "num_input_tokens_seen": 207579976, + "step": 3134 + }, + { + "epoch": 0.29334955772920857, + "loss": 1.2061455249786377, + "loss_ce": 0.0054619936272501945, + "loss_iou": 0.5234375, + "loss_num": 0.03125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 207579976, + "step": 3134 + }, + { + "epoch": 0.29344316001310433, + "grad_norm": 24.305742263793945, + "learning_rate": 5e-05, + "loss": 1.4683, + "num_input_tokens_seen": 207646368, + "step": 3135 + }, + { + "epoch": 0.29344316001310433, + "loss": 1.6683286428451538, + "loss_ce": 0.0032895775511860847, + "loss_iou": 0.66015625, + "loss_num": 0.0693359375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 207646368, + "step": 3135 + }, + { + "epoch": 0.29353676229700004, + "grad_norm": 29.360382080078125, + "learning_rate": 5e-05, + "loss": 1.3109, + "num_input_tokens_seen": 207711772, + "step": 3136 + }, + { + "epoch": 0.29353676229700004, + "loss": 1.3256011009216309, + "loss_ce": 0.005288603715598583, + "loss_iou": 0.55078125, + "loss_num": 0.0439453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 207711772, + "step": 3136 + }, + { + "epoch": 0.29363036458089575, + "grad_norm": 23.152889251708984, + "learning_rate": 5e-05, + "loss": 1.3689, + "num_input_tokens_seen": 207778708, + "step": 3137 + }, + { + "epoch": 0.29363036458089575, + "loss": 1.370178461074829, + "loss_ce": 0.003479274455457926, + "loss_iou": 0.5625, + "loss_num": 0.048583984375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 207778708, + "step": 3137 + }, + { + "epoch": 0.2937239668647915, + "grad_norm": 21.715951919555664, + "learning_rate": 5e-05, + "loss": 1.1348, + "num_input_tokens_seen": 207844300, + "step": 3138 + }, + { + "epoch": 0.2937239668647915, + "loss": 1.3429853916168213, + "loss_ce": 0.005094733089208603, + "loss_iou": 0.5546875, + "loss_num": 0.045654296875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 207844300, + "step": 3138 + }, + { + "epoch": 0.29381756914868723, + "grad_norm": 21.789844512939453, + "learning_rate": 5e-05, + "loss": 1.3594, + "num_input_tokens_seen": 207910844, + "step": 3139 + }, + { + "epoch": 0.29381756914868723, + "loss": 1.5190027952194214, + "loss_ce": 0.007284093182533979, + "loss_iou": 0.6328125, + "loss_num": 0.049560546875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 207910844, + "step": 3139 + }, + { + "epoch": 0.29391117143258294, + "grad_norm": 22.1756534576416, + "learning_rate": 5e-05, + "loss": 1.1716, + "num_input_tokens_seen": 207976996, + "step": 3140 + }, + { + "epoch": 0.29391117143258294, + "loss": 1.0930942296981812, + "loss_ce": 0.003250464564189315, + "loss_iou": 0.46875, + "loss_num": 0.0303955078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 207976996, + "step": 3140 + }, + { + "epoch": 0.2940047737164787, + "grad_norm": 26.880090713500977, + "learning_rate": 5e-05, + "loss": 1.3201, + "num_input_tokens_seen": 208042332, + "step": 3141 + }, + { + "epoch": 0.2940047737164787, + "loss": 1.4008922576904297, + "loss_ce": 0.0053844391368329525, + "loss_iou": 0.5390625, + "loss_num": 0.06396484375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 208042332, + "step": 3141 + }, + { + "epoch": 0.2940983760003744, + "grad_norm": 23.85240936279297, + "learning_rate": 5e-05, + "loss": 1.3306, + "num_input_tokens_seen": 208107356, + "step": 3142 + }, + { + "epoch": 0.2940983760003744, + "loss": 1.4011750221252441, + "loss_ce": 0.0037140091881155968, + "loss_iou": 0.59765625, + "loss_num": 0.04052734375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 208107356, + "step": 3142 + }, + { + "epoch": 0.29419197828427013, + "grad_norm": 31.882375717163086, + "learning_rate": 5e-05, + "loss": 1.4407, + "num_input_tokens_seen": 208174744, + "step": 3143 + }, + { + "epoch": 0.29419197828427013, + "loss": 1.424073576927185, + "loss_ce": 0.0075696613639593124, + "loss_iou": 0.5625, + "loss_num": 0.057861328125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 208174744, + "step": 3143 + }, + { + "epoch": 0.29428558056816584, + "grad_norm": 21.217817306518555, + "learning_rate": 5e-05, + "loss": 1.3887, + "num_input_tokens_seen": 208239860, + "step": 3144 + }, + { + "epoch": 0.29428558056816584, + "loss": 1.3767788410186768, + "loss_ce": 0.0022671385668218136, + "loss_iou": 0.578125, + "loss_num": 0.04345703125, + "loss_xval": 1.375, + "num_input_tokens_seen": 208239860, + "step": 3144 + }, + { + "epoch": 0.2943791828520616, + "grad_norm": 327.39508056640625, + "learning_rate": 5e-05, + "loss": 1.3545, + "num_input_tokens_seen": 208306112, + "step": 3145 + }, + { + "epoch": 0.2943791828520616, + "loss": 1.4406671524047852, + "loss_ce": 0.001214096206240356, + "loss_iou": 0.59765625, + "loss_num": 0.048583984375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 208306112, + "step": 3145 + }, + { + "epoch": 0.2944727851359573, + "grad_norm": 32.474185943603516, + "learning_rate": 5e-05, + "loss": 1.5501, + "num_input_tokens_seen": 208372224, + "step": 3146 + }, + { + "epoch": 0.2944727851359573, + "loss": 1.6570346355438232, + "loss_ce": 0.006643939297646284, + "loss_iou": 0.65234375, + "loss_num": 0.06982421875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 208372224, + "step": 3146 + }, + { + "epoch": 0.294566387419853, + "grad_norm": 34.72795104980469, + "learning_rate": 5e-05, + "loss": 1.3922, + "num_input_tokens_seen": 208439076, + "step": 3147 + }, + { + "epoch": 0.294566387419853, + "loss": 1.3625001907348633, + "loss_ce": 0.004101745784282684, + "loss_iou": 0.59765625, + "loss_num": 0.032470703125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 208439076, + "step": 3147 + }, + { + "epoch": 0.2946599897037488, + "grad_norm": 29.345046997070312, + "learning_rate": 5e-05, + "loss": 1.4084, + "num_input_tokens_seen": 208505016, + "step": 3148 + }, + { + "epoch": 0.2946599897037488, + "loss": 1.1651557683944702, + "loss_ce": 0.004999512806534767, + "loss_iou": 0.474609375, + "loss_num": 0.0419921875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 208505016, + "step": 3148 + }, + { + "epoch": 0.2947535919876445, + "grad_norm": 22.391761779785156, + "learning_rate": 5e-05, + "loss": 1.3775, + "num_input_tokens_seen": 208572248, + "step": 3149 + }, + { + "epoch": 0.2947535919876445, + "loss": 1.5035743713378906, + "loss_ce": 0.003086050506681204, + "loss_iou": 0.59765625, + "loss_num": 0.061279296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 208572248, + "step": 3149 + }, + { + "epoch": 0.2948471942715402, + "grad_norm": 11.164349555969238, + "learning_rate": 5e-05, + "loss": 1.2155, + "num_input_tokens_seen": 208639124, + "step": 3150 + }, + { + "epoch": 0.2948471942715402, + "loss": 1.2595999240875244, + "loss_ce": 0.0026419798377901316, + "loss_iou": 0.5, + "loss_num": 0.051025390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 208639124, + "step": 3150 + }, + { + "epoch": 0.294940796555436, + "grad_norm": 11.988375663757324, + "learning_rate": 5e-05, + "loss": 1.067, + "num_input_tokens_seen": 208705720, + "step": 3151 + }, + { + "epoch": 0.294940796555436, + "loss": 1.0030790567398071, + "loss_ce": 0.004421837627887726, + "loss_iou": 0.369140625, + "loss_num": 0.05224609375, + "loss_xval": 1.0, + "num_input_tokens_seen": 208705720, + "step": 3151 + }, + { + "epoch": 0.2950343988393317, + "grad_norm": 18.118045806884766, + "learning_rate": 5e-05, + "loss": 1.2427, + "num_input_tokens_seen": 208773172, + "step": 3152 + }, + { + "epoch": 0.2950343988393317, + "loss": 1.4260506629943848, + "loss_ce": 0.005152285099029541, + "loss_iou": 0.5625, + "loss_num": 0.05859375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 208773172, + "step": 3152 + }, + { + "epoch": 0.2951280011232274, + "grad_norm": 21.028003692626953, + "learning_rate": 5e-05, + "loss": 1.3926, + "num_input_tokens_seen": 208838888, + "step": 3153 + }, + { + "epoch": 0.2951280011232274, + "loss": 1.443208932876587, + "loss_ce": 0.005220574326813221, + "loss_iou": 0.6015625, + "loss_num": 0.047607421875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 208838888, + "step": 3153 + }, + { + "epoch": 0.2952216034071231, + "grad_norm": 38.488521575927734, + "learning_rate": 5e-05, + "loss": 1.4908, + "num_input_tokens_seen": 208905076, + "step": 3154 + }, + { + "epoch": 0.2952216034071231, + "loss": 1.5528995990753174, + "loss_ce": 0.004071405157446861, + "loss_iou": 0.61328125, + "loss_num": 0.06396484375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 208905076, + "step": 3154 + }, + { + "epoch": 0.2953152056910189, + "grad_norm": 30.399873733520508, + "learning_rate": 5e-05, + "loss": 1.2547, + "num_input_tokens_seen": 208971024, + "step": 3155 + }, + { + "epoch": 0.2953152056910189, + "loss": 1.2003772258758545, + "loss_ce": 0.007017870899289846, + "loss_iou": 0.5078125, + "loss_num": 0.036376953125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 208971024, + "step": 3155 + }, + { + "epoch": 0.2954088079749146, + "grad_norm": 22.53354835510254, + "learning_rate": 5e-05, + "loss": 1.4043, + "num_input_tokens_seen": 209037196, + "step": 3156 + }, + { + "epoch": 0.2954088079749146, + "loss": 1.3473820686340332, + "loss_ce": 0.0021671669092029333, + "loss_iou": 0.58984375, + "loss_num": 0.033203125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 209037196, + "step": 3156 + }, + { + "epoch": 0.2955024102588103, + "grad_norm": 21.677459716796875, + "learning_rate": 5e-05, + "loss": 1.3033, + "num_input_tokens_seen": 209102704, + "step": 3157 + }, + { + "epoch": 0.2955024102588103, + "loss": 1.211876392364502, + "loss_ce": 0.004845119547098875, + "loss_iou": 0.40234375, + "loss_num": 0.08056640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 209102704, + "step": 3157 + }, + { + "epoch": 0.29559601254270607, + "grad_norm": 25.992403030395508, + "learning_rate": 5e-05, + "loss": 1.5325, + "num_input_tokens_seen": 209168692, + "step": 3158 + }, + { + "epoch": 0.29559601254270607, + "loss": 1.4381616115570068, + "loss_ce": 0.003591251326724887, + "loss_iou": 0.55078125, + "loss_num": 0.06640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 209168692, + "step": 3158 + }, + { + "epoch": 0.2956896148266018, + "grad_norm": 35.65980529785156, + "learning_rate": 5e-05, + "loss": 1.3906, + "num_input_tokens_seen": 209234808, + "step": 3159 + }, + { + "epoch": 0.2956896148266018, + "loss": 1.2261383533477783, + "loss_ce": 0.006900131702423096, + "loss_iou": 0.52734375, + "loss_num": 0.032470703125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 209234808, + "step": 3159 + }, + { + "epoch": 0.2957832171104975, + "grad_norm": 25.198034286499023, + "learning_rate": 5e-05, + "loss": 1.6574, + "num_input_tokens_seen": 209301688, + "step": 3160 + }, + { + "epoch": 0.2957832171104975, + "loss": 1.6029188632965088, + "loss_ce": 0.007215700577944517, + "loss_iou": 0.6484375, + "loss_num": 0.059814453125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 209301688, + "step": 3160 + }, + { + "epoch": 0.2958768193943932, + "grad_norm": 18.281625747680664, + "learning_rate": 5e-05, + "loss": 1.5526, + "num_input_tokens_seen": 209367668, + "step": 3161 + }, + { + "epoch": 0.2958768193943932, + "loss": 1.4791182279586792, + "loss_ce": 0.007926858961582184, + "loss_iou": 0.578125, + "loss_num": 0.06396484375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 209367668, + "step": 3161 + }, + { + "epoch": 0.29597042167828896, + "grad_norm": 17.44465446472168, + "learning_rate": 5e-05, + "loss": 1.2163, + "num_input_tokens_seen": 209434884, + "step": 3162 + }, + { + "epoch": 0.29597042167828896, + "loss": 1.0572702884674072, + "loss_ce": 0.0040476699359714985, + "loss_iou": 0.427734375, + "loss_num": 0.039306640625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 209434884, + "step": 3162 + }, + { + "epoch": 0.2960640239621847, + "grad_norm": 36.99618148803711, + "learning_rate": 5e-05, + "loss": 1.4565, + "num_input_tokens_seen": 209500584, + "step": 3163 + }, + { + "epoch": 0.2960640239621847, + "loss": 1.5771830081939697, + "loss_ce": 0.0029641787987202406, + "loss_iou": 0.60546875, + "loss_num": 0.0732421875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 209500584, + "step": 3163 + }, + { + "epoch": 0.2961576262460804, + "grad_norm": 19.906404495239258, + "learning_rate": 5e-05, + "loss": 1.4188, + "num_input_tokens_seen": 209566176, + "step": 3164 + }, + { + "epoch": 0.2961576262460804, + "loss": 1.2771902084350586, + "loss_ce": 0.008635531179606915, + "loss_iou": 0.5234375, + "loss_num": 0.04443359375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 209566176, + "step": 3164 + }, + { + "epoch": 0.29625122852997615, + "grad_norm": 507.19354248046875, + "learning_rate": 5e-05, + "loss": 1.3143, + "num_input_tokens_seen": 209631572, + "step": 3165 + }, + { + "epoch": 0.29625122852997615, + "loss": 1.2409627437591553, + "loss_ce": 0.007076114881783724, + "loss_iou": 0.51953125, + "loss_num": 0.0390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 209631572, + "step": 3165 + }, + { + "epoch": 0.29634483081387186, + "grad_norm": 58.572933197021484, + "learning_rate": 5e-05, + "loss": 1.4134, + "num_input_tokens_seen": 209697820, + "step": 3166 + }, + { + "epoch": 0.29634483081387186, + "loss": 1.5384180545806885, + "loss_ce": 0.010097688063979149, + "loss_iou": 0.62109375, + "loss_num": 0.0576171875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 209697820, + "step": 3166 + }, + { + "epoch": 0.2964384330977676, + "grad_norm": 25.61594581604004, + "learning_rate": 5e-05, + "loss": 1.3639, + "num_input_tokens_seen": 209764664, + "step": 3167 + }, + { + "epoch": 0.2964384330977676, + "loss": 1.5066044330596924, + "loss_ce": 0.007581004872918129, + "loss_iou": 0.625, + "loss_num": 0.049560546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 209764664, + "step": 3167 + }, + { + "epoch": 0.29653203538166334, + "grad_norm": 36.73101806640625, + "learning_rate": 5e-05, + "loss": 1.3057, + "num_input_tokens_seen": 209831260, + "step": 3168 + }, + { + "epoch": 0.29653203538166334, + "loss": 1.3140450716018677, + "loss_ce": 0.01228724978864193, + "loss_iou": 0.5078125, + "loss_num": 0.0576171875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 209831260, + "step": 3168 + }, + { + "epoch": 0.29662563766555905, + "grad_norm": 26.18838882446289, + "learning_rate": 5e-05, + "loss": 1.4561, + "num_input_tokens_seen": 209896908, + "step": 3169 + }, + { + "epoch": 0.29662563766555905, + "loss": 1.4335148334503174, + "loss_ce": 0.007367314770817757, + "loss_iou": 0.56640625, + "loss_num": 0.05908203125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 209896908, + "step": 3169 + }, + { + "epoch": 0.29671923994945476, + "grad_norm": 30.429698944091797, + "learning_rate": 5e-05, + "loss": 1.1071, + "num_input_tokens_seen": 209963180, + "step": 3170 + }, + { + "epoch": 0.29671923994945476, + "loss": 1.004789113998413, + "loss_ce": 0.004026209469884634, + "loss_iou": 0.400390625, + "loss_num": 0.0400390625, + "loss_xval": 1.0, + "num_input_tokens_seen": 209963180, + "step": 3170 + }, + { + "epoch": 0.29681284223335047, + "grad_norm": 13.63418960571289, + "learning_rate": 5e-05, + "loss": 1.125, + "num_input_tokens_seen": 210029628, + "step": 3171 + }, + { + "epoch": 0.29681284223335047, + "loss": 0.8942599296569824, + "loss_ce": 0.0036349084693938494, + "loss_iou": 0.369140625, + "loss_num": 0.0302734375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 210029628, + "step": 3171 + }, + { + "epoch": 0.29690644451724624, + "grad_norm": 23.76020622253418, + "learning_rate": 5e-05, + "loss": 1.3733, + "num_input_tokens_seen": 210095444, + "step": 3172 + }, + { + "epoch": 0.29690644451724624, + "loss": 1.2906464338302612, + "loss_ce": 0.002560456981882453, + "loss_iou": 0.52734375, + "loss_num": 0.046142578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 210095444, + "step": 3172 + }, + { + "epoch": 0.29700004680114195, + "grad_norm": 19.09914207458496, + "learning_rate": 5e-05, + "loss": 1.2956, + "num_input_tokens_seen": 210162084, + "step": 3173 + }, + { + "epoch": 0.29700004680114195, + "loss": 1.334072470664978, + "loss_ce": 0.004970910027623177, + "loss_iou": 0.53125, + "loss_num": 0.052490234375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 210162084, + "step": 3173 + }, + { + "epoch": 0.29709364908503766, + "grad_norm": 30.996551513671875, + "learning_rate": 5e-05, + "loss": 1.3614, + "num_input_tokens_seen": 210230100, + "step": 3174 + }, + { + "epoch": 0.29709364908503766, + "loss": 1.333590030670166, + "loss_ce": 0.011324452236294746, + "loss_iou": 0.546875, + "loss_num": 0.045166015625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 210230100, + "step": 3174 + }, + { + "epoch": 0.2971872513689334, + "grad_norm": 72.84232330322266, + "learning_rate": 5e-05, + "loss": 1.6517, + "num_input_tokens_seen": 210295896, + "step": 3175 + }, + { + "epoch": 0.2971872513689334, + "loss": 1.7026541233062744, + "loss_ce": 0.0034353560768067837, + "loss_iou": 0.71875, + "loss_num": 0.05322265625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 210295896, + "step": 3175 + }, + { + "epoch": 0.29728085365282914, + "grad_norm": 33.01655197143555, + "learning_rate": 5e-05, + "loss": 1.5121, + "num_input_tokens_seen": 210363516, + "step": 3176 + }, + { + "epoch": 0.29728085365282914, + "loss": 1.541312575340271, + "loss_ce": 0.007132841739803553, + "loss_iou": 0.58984375, + "loss_num": 0.0703125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 210363516, + "step": 3176 + }, + { + "epoch": 0.29737445593672485, + "grad_norm": 26.523902893066406, + "learning_rate": 5e-05, + "loss": 1.5613, + "num_input_tokens_seen": 210429380, + "step": 3177 + }, + { + "epoch": 0.29737445593672485, + "loss": 1.5439693927764893, + "loss_ce": 0.003930393140763044, + "loss_iou": 0.66796875, + "loss_num": 0.04052734375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 210429380, + "step": 3177 + }, + { + "epoch": 0.29746805822062056, + "grad_norm": 142.507080078125, + "learning_rate": 5e-05, + "loss": 1.2231, + "num_input_tokens_seen": 210497064, + "step": 3178 + }, + { + "epoch": 0.29746805822062056, + "loss": 1.1566380262374878, + "loss_ce": 0.005759131163358688, + "loss_iou": 0.478515625, + "loss_num": 0.03857421875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 210497064, + "step": 3178 + }, + { + "epoch": 0.2975616605045163, + "grad_norm": 26.93979835510254, + "learning_rate": 5e-05, + "loss": 1.3947, + "num_input_tokens_seen": 210563260, + "step": 3179 + }, + { + "epoch": 0.2975616605045163, + "loss": 1.2275421619415283, + "loss_ce": 0.003909416031092405, + "loss_iou": 0.458984375, + "loss_num": 0.061279296875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 210563260, + "step": 3179 + }, + { + "epoch": 0.29765526278841203, + "grad_norm": 32.10329055786133, + "learning_rate": 5e-05, + "loss": 1.6622, + "num_input_tokens_seen": 210629748, + "step": 3180 + }, + { + "epoch": 0.29765526278841203, + "loss": 1.7176234722137451, + "loss_ce": 0.0037563140504062176, + "loss_iou": 0.68359375, + "loss_num": 0.06884765625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 210629748, + "step": 3180 + }, + { + "epoch": 0.29774886507230774, + "grad_norm": 22.704957962036133, + "learning_rate": 5e-05, + "loss": 1.426, + "num_input_tokens_seen": 210696296, + "step": 3181 + }, + { + "epoch": 0.29774886507230774, + "loss": 1.5410833358764648, + "loss_ce": 0.005927057005465031, + "loss_iou": 0.58984375, + "loss_num": 0.0712890625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 210696296, + "step": 3181 + }, + { + "epoch": 0.2978424673562035, + "grad_norm": 13.9943208694458, + "learning_rate": 5e-05, + "loss": 1.1389, + "num_input_tokens_seen": 210762440, + "step": 3182 + }, + { + "epoch": 0.2978424673562035, + "loss": 1.323799967765808, + "loss_ce": 0.002510875929147005, + "loss_iou": 0.51953125, + "loss_num": 0.056396484375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 210762440, + "step": 3182 + }, + { + "epoch": 0.2979360696400992, + "grad_norm": 31.6590633392334, + "learning_rate": 5e-05, + "loss": 1.3181, + "num_input_tokens_seen": 210827516, + "step": 3183 + }, + { + "epoch": 0.2979360696400992, + "loss": 1.3236525058746338, + "loss_ce": 0.007734533865004778, + "loss_iou": 0.5625, + "loss_num": 0.03759765625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 210827516, + "step": 3183 + }, + { + "epoch": 0.29802967192399493, + "grad_norm": 79.19822692871094, + "learning_rate": 5e-05, + "loss": 1.429, + "num_input_tokens_seen": 210893532, + "step": 3184 + }, + { + "epoch": 0.29802967192399493, + "loss": 1.205948829650879, + "loss_ce": 0.0072183674201369286, + "loss_iou": 0.47265625, + "loss_num": 0.05126953125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 210893532, + "step": 3184 + }, + { + "epoch": 0.2981232742078907, + "grad_norm": 26.083589553833008, + "learning_rate": 5e-05, + "loss": 1.2951, + "num_input_tokens_seen": 210960092, + "step": 3185 + }, + { + "epoch": 0.2981232742078907, + "loss": 1.2965949773788452, + "loss_ce": 0.007379880174994469, + "loss_iou": 0.5703125, + "loss_num": 0.03076171875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 210960092, + "step": 3185 + }, + { + "epoch": 0.2982168764917864, + "grad_norm": 36.118099212646484, + "learning_rate": 5e-05, + "loss": 1.3877, + "num_input_tokens_seen": 211027020, + "step": 3186 + }, + { + "epoch": 0.2982168764917864, + "loss": 1.588663101196289, + "loss_ce": 0.006631866097450256, + "loss_iou": 0.59765625, + "loss_num": 0.0771484375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 211027020, + "step": 3186 + }, + { + "epoch": 0.2983104787756821, + "grad_norm": 18.90194320678711, + "learning_rate": 5e-05, + "loss": 1.4396, + "num_input_tokens_seen": 211091992, + "step": 3187 + }, + { + "epoch": 0.2983104787756821, + "loss": 1.517781138420105, + "loss_ce": 0.004109271802008152, + "loss_iou": 0.625, + "loss_num": 0.052001953125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 211091992, + "step": 3187 + }, + { + "epoch": 0.29840408105957783, + "grad_norm": 14.602373123168945, + "learning_rate": 5e-05, + "loss": 1.1811, + "num_input_tokens_seen": 211157576, + "step": 3188 + }, + { + "epoch": 0.29840408105957783, + "loss": 1.405256748199463, + "loss_ce": 0.007307431660592556, + "loss_iou": 0.56640625, + "loss_num": 0.05322265625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 211157576, + "step": 3188 + }, + { + "epoch": 0.2984976833434736, + "grad_norm": 17.76519203186035, + "learning_rate": 5e-05, + "loss": 1.2077, + "num_input_tokens_seen": 211223808, + "step": 3189 + }, + { + "epoch": 0.2984976833434736, + "loss": 1.30693781375885, + "loss_ce": 0.0027385344728827477, + "loss_iou": 0.546875, + "loss_num": 0.041259765625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 211223808, + "step": 3189 + }, + { + "epoch": 0.2985912856273693, + "grad_norm": 23.347253799438477, + "learning_rate": 5e-05, + "loss": 1.2762, + "num_input_tokens_seen": 211290196, + "step": 3190 + }, + { + "epoch": 0.2985912856273693, + "loss": 1.3785088062286377, + "loss_ce": 0.00399717316031456, + "loss_iou": 0.52734375, + "loss_num": 0.06396484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 211290196, + "step": 3190 + }, + { + "epoch": 0.298684887911265, + "grad_norm": 11.815969467163086, + "learning_rate": 5e-05, + "loss": 1.2287, + "num_input_tokens_seen": 211355016, + "step": 3191 + }, + { + "epoch": 0.298684887911265, + "loss": 1.1194970607757568, + "loss_ce": 0.007100821007043123, + "loss_iou": 0.44140625, + "loss_num": 0.045654296875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 211355016, + "step": 3191 + }, + { + "epoch": 0.2987784901951608, + "grad_norm": 12.142919540405273, + "learning_rate": 5e-05, + "loss": 1.0354, + "num_input_tokens_seen": 211421372, + "step": 3192 + }, + { + "epoch": 0.2987784901951608, + "loss": 1.2642282247543335, + "loss_ce": 0.010321984067559242, + "loss_iou": 0.478515625, + "loss_num": 0.05908203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 211421372, + "step": 3192 + }, + { + "epoch": 0.2988720924790565, + "grad_norm": 31.494945526123047, + "learning_rate": 5e-05, + "loss": 1.2807, + "num_input_tokens_seen": 211488196, + "step": 3193 + }, + { + "epoch": 0.2988720924790565, + "loss": 1.1928824186325073, + "loss_ce": 0.00538242980837822, + "loss_iou": 0.52734375, + "loss_num": 0.0269775390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 211488196, + "step": 3193 + }, + { + "epoch": 0.2989656947629522, + "grad_norm": 23.278404235839844, + "learning_rate": 5e-05, + "loss": 1.3216, + "num_input_tokens_seen": 211554372, + "step": 3194 + }, + { + "epoch": 0.2989656947629522, + "loss": 1.3579224348068237, + "loss_ce": 0.003186051268130541, + "loss_iou": 0.53515625, + "loss_num": 0.056640625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 211554372, + "step": 3194 + }, + { + "epoch": 0.2990592970468479, + "grad_norm": 45.67925262451172, + "learning_rate": 5e-05, + "loss": 1.2529, + "num_input_tokens_seen": 211620852, + "step": 3195 + }, + { + "epoch": 0.2990592970468479, + "loss": 1.220914363861084, + "loss_ce": 0.005582316778600216, + "loss_iou": 0.50390625, + "loss_num": 0.0419921875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 211620852, + "step": 3195 + }, + { + "epoch": 0.2991528993307437, + "grad_norm": 29.797014236450195, + "learning_rate": 5e-05, + "loss": 1.2184, + "num_input_tokens_seen": 211686040, + "step": 3196 + }, + { + "epoch": 0.2991528993307437, + "loss": 0.9787318110466003, + "loss_ce": 0.003878271207213402, + "loss_iou": 0.419921875, + "loss_num": 0.027099609375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 211686040, + "step": 3196 + }, + { + "epoch": 0.2992465016146394, + "grad_norm": 27.623170852661133, + "learning_rate": 5e-05, + "loss": 1.4251, + "num_input_tokens_seen": 211751316, + "step": 3197 + }, + { + "epoch": 0.2992465016146394, + "loss": 1.499799370765686, + "loss_ce": 0.006879471242427826, + "loss_iou": 0.609375, + "loss_num": 0.05419921875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 211751316, + "step": 3197 + }, + { + "epoch": 0.2993401038985351, + "grad_norm": 30.4111328125, + "learning_rate": 5e-05, + "loss": 1.2305, + "num_input_tokens_seen": 211817220, + "step": 3198 + }, + { + "epoch": 0.2993401038985351, + "loss": 1.3019930124282837, + "loss_ce": 0.005118002183735371, + "loss_iou": 0.51953125, + "loss_num": 0.05126953125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 211817220, + "step": 3198 + }, + { + "epoch": 0.29943370618243087, + "grad_norm": 26.70878028869629, + "learning_rate": 5e-05, + "loss": 1.2807, + "num_input_tokens_seen": 211883500, + "step": 3199 + }, + { + "epoch": 0.29943370618243087, + "loss": 1.4420101642608643, + "loss_ce": 0.003045236924663186, + "loss_iou": 0.6171875, + "loss_num": 0.041259765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 211883500, + "step": 3199 + }, + { + "epoch": 0.2995273084663266, + "grad_norm": 32.45808792114258, + "learning_rate": 5e-05, + "loss": 1.4446, + "num_input_tokens_seen": 211949328, + "step": 3200 + }, + { + "epoch": 0.2995273084663266, + "loss": 1.4723834991455078, + "loss_ce": 0.002657028380781412, + "loss_iou": 0.59375, + "loss_num": 0.05712890625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 211949328, + "step": 3200 + }, + { + "epoch": 0.2996209107502223, + "grad_norm": 64.23419189453125, + "learning_rate": 5e-05, + "loss": 1.3893, + "num_input_tokens_seen": 212016048, + "step": 3201 + }, + { + "epoch": 0.2996209107502223, + "loss": 1.1797385215759277, + "loss_ce": 0.004933887626975775, + "loss_iou": 0.50390625, + "loss_num": 0.033935546875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 212016048, + "step": 3201 + }, + { + "epoch": 0.29971451303411806, + "grad_norm": 28.914752960205078, + "learning_rate": 5e-05, + "loss": 1.2071, + "num_input_tokens_seen": 212082736, + "step": 3202 + }, + { + "epoch": 0.29971451303411806, + "loss": 1.3583087921142578, + "loss_ce": 0.005281480029225349, + "loss_iou": 0.54296875, + "loss_num": 0.053955078125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 212082736, + "step": 3202 + }, + { + "epoch": 0.29980811531801377, + "grad_norm": 23.79576873779297, + "learning_rate": 5e-05, + "loss": 1.285, + "num_input_tokens_seen": 212148536, + "step": 3203 + }, + { + "epoch": 0.29980811531801377, + "loss": 1.318683385848999, + "loss_ce": 0.008136525750160217, + "loss_iou": 0.53515625, + "loss_num": 0.0478515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 212148536, + "step": 3203 + }, + { + "epoch": 0.2999017176019095, + "grad_norm": 16.102449417114258, + "learning_rate": 5e-05, + "loss": 1.1803, + "num_input_tokens_seen": 212215436, + "step": 3204 + }, + { + "epoch": 0.2999017176019095, + "loss": 1.541203260421753, + "loss_ce": 0.008976730518043041, + "loss_iou": 0.59765625, + "loss_num": 0.0673828125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 212215436, + "step": 3204 + }, + { + "epoch": 0.2999953198858052, + "grad_norm": 15.068942070007324, + "learning_rate": 5e-05, + "loss": 1.4287, + "num_input_tokens_seen": 212282248, + "step": 3205 + }, + { + "epoch": 0.2999953198858052, + "loss": 1.438281536102295, + "loss_ce": 0.00224632048048079, + "loss_iou": 0.546875, + "loss_num": 0.068359375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 212282248, + "step": 3205 + }, + { + "epoch": 0.30008892216970096, + "grad_norm": 38.77875518798828, + "learning_rate": 5e-05, + "loss": 1.2581, + "num_input_tokens_seen": 212349404, + "step": 3206 + }, + { + "epoch": 0.30008892216970096, + "loss": 1.3109136819839478, + "loss_ce": 0.006226138211786747, + "loss_iou": 0.5078125, + "loss_num": 0.057373046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 212349404, + "step": 3206 + }, + { + "epoch": 0.30018252445359667, + "grad_norm": 31.315486907958984, + "learning_rate": 5e-05, + "loss": 1.3004, + "num_input_tokens_seen": 212415328, + "step": 3207 + }, + { + "epoch": 0.30018252445359667, + "loss": 1.4697887897491455, + "loss_ce": 0.007386452984064817, + "loss_iou": 0.5859375, + "loss_num": 0.057373046875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 212415328, + "step": 3207 + }, + { + "epoch": 0.3002761267374924, + "grad_norm": 24.298362731933594, + "learning_rate": 5e-05, + "loss": 1.088, + "num_input_tokens_seen": 212480916, + "step": 3208 + }, + { + "epoch": 0.3002761267374924, + "loss": 1.2162508964538574, + "loss_ce": 0.0048251026310026646, + "loss_iou": 0.515625, + "loss_num": 0.035888671875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 212480916, + "step": 3208 + }, + { + "epoch": 0.30036972902138814, + "grad_norm": 26.170442581176758, + "learning_rate": 5e-05, + "loss": 1.263, + "num_input_tokens_seen": 212547208, + "step": 3209 + }, + { + "epoch": 0.30036972902138814, + "loss": 1.1825824975967407, + "loss_ce": 0.0043597957119345665, + "loss_iou": 0.52734375, + "loss_num": 0.02490234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 212547208, + "step": 3209 + }, + { + "epoch": 0.30046333130528385, + "grad_norm": 19.814950942993164, + "learning_rate": 5e-05, + "loss": 1.6239, + "num_input_tokens_seen": 212613312, + "step": 3210 + }, + { + "epoch": 0.30046333130528385, + "loss": 1.767000675201416, + "loss_ce": 0.006258486304432154, + "loss_iou": 0.671875, + "loss_num": 0.083984375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 212613312, + "step": 3210 + }, + { + "epoch": 0.30055693358917956, + "grad_norm": 10.765375137329102, + "learning_rate": 5e-05, + "loss": 1.3205, + "num_input_tokens_seen": 212680120, + "step": 3211 + }, + { + "epoch": 0.30055693358917956, + "loss": 1.3583877086639404, + "loss_ce": 0.004383737687021494, + "loss_iou": 0.57421875, + "loss_num": 0.041015625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 212680120, + "step": 3211 + }, + { + "epoch": 0.30065053587307533, + "grad_norm": 16.67058753967285, + "learning_rate": 5e-05, + "loss": 1.0711, + "num_input_tokens_seen": 212746068, + "step": 3212 + }, + { + "epoch": 0.30065053587307533, + "loss": 1.119780421257019, + "loss_ce": 0.010405439883470535, + "loss_iou": 0.48828125, + "loss_num": 0.026123046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 212746068, + "step": 3212 + }, + { + "epoch": 0.30074413815697104, + "grad_norm": 26.18053436279297, + "learning_rate": 5e-05, + "loss": 1.3412, + "num_input_tokens_seen": 212812188, + "step": 3213 + }, + { + "epoch": 0.30074413815697104, + "loss": 1.483103632926941, + "loss_ce": 0.0021465462632477283, + "loss_iou": 0.60546875, + "loss_num": 0.0546875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 212812188, + "step": 3213 + }, + { + "epoch": 0.30083774044086675, + "grad_norm": 55.32423782348633, + "learning_rate": 5e-05, + "loss": 1.338, + "num_input_tokens_seen": 212878832, + "step": 3214 + }, + { + "epoch": 0.30083774044086675, + "loss": 1.2190561294555664, + "loss_ce": 0.007141960319131613, + "loss_iou": 0.51171875, + "loss_num": 0.037109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 212878832, + "step": 3214 + }, + { + "epoch": 0.30093134272476246, + "grad_norm": 22.433765411376953, + "learning_rate": 5e-05, + "loss": 1.3973, + "num_input_tokens_seen": 212944732, + "step": 3215 + }, + { + "epoch": 0.30093134272476246, + "loss": 1.596411108970642, + "loss_ce": 0.0046141850762069225, + "loss_iou": 0.671875, + "loss_num": 0.05029296875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 212944732, + "step": 3215 + }, + { + "epoch": 0.30102494500865823, + "grad_norm": 23.633569717407227, + "learning_rate": 5e-05, + "loss": 1.1616, + "num_input_tokens_seen": 213011500, + "step": 3216 + }, + { + "epoch": 0.30102494500865823, + "loss": 1.1757826805114746, + "loss_ce": 0.004395921714603901, + "loss_iou": 0.453125, + "loss_num": 0.052734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 213011500, + "step": 3216 + }, + { + "epoch": 0.30111854729255394, + "grad_norm": 39.09695053100586, + "learning_rate": 5e-05, + "loss": 1.5004, + "num_input_tokens_seen": 213077480, + "step": 3217 + }, + { + "epoch": 0.30111854729255394, + "loss": 1.3216581344604492, + "loss_ce": 0.0032987878657877445, + "loss_iou": 0.484375, + "loss_num": 0.0703125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 213077480, + "step": 3217 + }, + { + "epoch": 0.30121214957644965, + "grad_norm": 18.81067657470703, + "learning_rate": 5e-05, + "loss": 1.6657, + "num_input_tokens_seen": 213143548, + "step": 3218 + }, + { + "epoch": 0.30121214957644965, + "loss": 1.7820554971694946, + "loss_ce": 0.005688296630978584, + "loss_iou": 0.7578125, + "loss_num": 0.052001953125, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 213143548, + "step": 3218 + }, + { + "epoch": 0.3013057518603454, + "grad_norm": 13.473433494567871, + "learning_rate": 5e-05, + "loss": 1.276, + "num_input_tokens_seen": 213209108, + "step": 3219 + }, + { + "epoch": 0.3013057518603454, + "loss": 1.4027621746063232, + "loss_ce": 0.00823102705180645, + "loss_iou": 0.52734375, + "loss_num": 0.068359375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 213209108, + "step": 3219 + }, + { + "epoch": 0.3013993541442411, + "grad_norm": 35.56060791015625, + "learning_rate": 5e-05, + "loss": 1.4559, + "num_input_tokens_seen": 213274852, + "step": 3220 + }, + { + "epoch": 0.3013993541442411, + "loss": 1.5947089195251465, + "loss_ce": 0.005353348329663277, + "loss_iou": 0.6171875, + "loss_num": 0.0703125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 213274852, + "step": 3220 + }, + { + "epoch": 0.30149295642813684, + "grad_norm": 22.169809341430664, + "learning_rate": 5e-05, + "loss": 1.4235, + "num_input_tokens_seen": 213341776, + "step": 3221 + }, + { + "epoch": 0.30149295642813684, + "loss": 1.308863878250122, + "loss_ce": 0.004176258575171232, + "loss_iou": 0.546875, + "loss_num": 0.0419921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 213341776, + "step": 3221 + }, + { + "epoch": 0.30158655871203255, + "grad_norm": 25.523279190063477, + "learning_rate": 5e-05, + "loss": 1.1305, + "num_input_tokens_seen": 213407452, + "step": 3222 + }, + { + "epoch": 0.30158655871203255, + "loss": 1.0799659490585327, + "loss_ce": 0.006723684258759022, + "loss_iou": 0.4140625, + "loss_num": 0.04931640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 213407452, + "step": 3222 + }, + { + "epoch": 0.3016801609959283, + "grad_norm": 20.90241241455078, + "learning_rate": 5e-05, + "loss": 1.561, + "num_input_tokens_seen": 213474412, + "step": 3223 + }, + { + "epoch": 0.3016801609959283, + "loss": 1.497004747390747, + "loss_ce": 0.006770439445972443, + "loss_iou": 0.59765625, + "loss_num": 0.05810546875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 213474412, + "step": 3223 + }, + { + "epoch": 0.301773763279824, + "grad_norm": 31.53831672668457, + "learning_rate": 5e-05, + "loss": 1.4469, + "num_input_tokens_seen": 213540040, + "step": 3224 + }, + { + "epoch": 0.301773763279824, + "loss": 1.5138611793518066, + "loss_ce": 0.004583851899951696, + "loss_iou": 0.6328125, + "loss_num": 0.049072265625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 213540040, + "step": 3224 + }, + { + "epoch": 0.30186736556371974, + "grad_norm": 54.882659912109375, + "learning_rate": 5e-05, + "loss": 1.5197, + "num_input_tokens_seen": 213606588, + "step": 3225 + }, + { + "epoch": 0.30186736556371974, + "loss": 1.3511512279510498, + "loss_ce": 0.004959766753017902, + "loss_iou": 0.578125, + "loss_num": 0.03857421875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 213606588, + "step": 3225 + }, + { + "epoch": 0.3019609678476155, + "grad_norm": 28.352731704711914, + "learning_rate": 5e-05, + "loss": 1.4563, + "num_input_tokens_seen": 213673088, + "step": 3226 + }, + { + "epoch": 0.3019609678476155, + "loss": 1.3883702754974365, + "loss_ce": 0.006534261628985405, + "loss_iou": 0.5703125, + "loss_num": 0.048095703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 213673088, + "step": 3226 + }, + { + "epoch": 0.3020545701315112, + "grad_norm": 24.50950050354004, + "learning_rate": 5e-05, + "loss": 1.4034, + "num_input_tokens_seen": 213738492, + "step": 3227 + }, + { + "epoch": 0.3020545701315112, + "loss": 1.476147174835205, + "loss_ce": 0.010326847434043884, + "loss_iou": 0.60546875, + "loss_num": 0.05029296875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 213738492, + "step": 3227 + }, + { + "epoch": 0.3021481724154069, + "grad_norm": 18.68236541748047, + "learning_rate": 5e-05, + "loss": 1.3179, + "num_input_tokens_seen": 213805016, + "step": 3228 + }, + { + "epoch": 0.3021481724154069, + "loss": 1.3263987302780151, + "loss_ce": 0.008039271458983421, + "loss_iou": 0.51953125, + "loss_num": 0.055419921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 213805016, + "step": 3228 + }, + { + "epoch": 0.3022417746993027, + "grad_norm": 25.224550247192383, + "learning_rate": 5e-05, + "loss": 1.4273, + "num_input_tokens_seen": 213871088, + "step": 3229 + }, + { + "epoch": 0.3022417746993027, + "loss": 1.4278255701065063, + "loss_ce": 0.004485721699893475, + "loss_iou": 0.53515625, + "loss_num": 0.0703125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 213871088, + "step": 3229 + }, + { + "epoch": 0.3023353769831984, + "grad_norm": 35.3027458190918, + "learning_rate": 5e-05, + "loss": 1.3905, + "num_input_tokens_seen": 213937156, + "step": 3230 + }, + { + "epoch": 0.3023353769831984, + "loss": 1.3928260803222656, + "loss_ce": 0.005130731966346502, + "loss_iou": 0.6015625, + "loss_num": 0.037841796875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 213937156, + "step": 3230 + }, + { + "epoch": 0.3024289792670941, + "grad_norm": 33.67833709716797, + "learning_rate": 5e-05, + "loss": 1.59, + "num_input_tokens_seen": 214003308, + "step": 3231 + }, + { + "epoch": 0.3024289792670941, + "loss": 1.752487301826477, + "loss_ce": 0.005417039152234793, + "loss_iou": 0.69140625, + "loss_num": 0.0732421875, + "loss_xval": 1.75, + "num_input_tokens_seen": 214003308, + "step": 3231 + }, + { + "epoch": 0.3025225815509898, + "grad_norm": 20.47861671447754, + "learning_rate": 5e-05, + "loss": 1.4846, + "num_input_tokens_seen": 214068572, + "step": 3232 + }, + { + "epoch": 0.3025225815509898, + "loss": 1.7175261974334717, + "loss_ce": 0.007076878100633621, + "loss_iou": 0.6875, + "loss_num": 0.06640625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 214068572, + "step": 3232 + }, + { + "epoch": 0.3026161838348856, + "grad_norm": 13.899136543273926, + "learning_rate": 5e-05, + "loss": 1.1464, + "num_input_tokens_seen": 214134372, + "step": 3233 + }, + { + "epoch": 0.3026161838348856, + "loss": 1.2238168716430664, + "loss_ce": 0.005974735599011183, + "loss_iou": 0.474609375, + "loss_num": 0.053466796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 214134372, + "step": 3233 + }, + { + "epoch": 0.3027097861187813, + "grad_norm": 11.780744552612305, + "learning_rate": 5e-05, + "loss": 0.9685, + "num_input_tokens_seen": 214200328, + "step": 3234 + }, + { + "epoch": 0.3027097861187813, + "loss": 0.8649457693099976, + "loss_ce": 0.004105925559997559, + "loss_iou": 0.34375, + "loss_num": 0.034912109375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 214200328, + "step": 3234 + }, + { + "epoch": 0.302803388402677, + "grad_norm": 20.04199981689453, + "learning_rate": 5e-05, + "loss": 1.3809, + "num_input_tokens_seen": 214266488, + "step": 3235 + }, + { + "epoch": 0.302803388402677, + "loss": 1.6200001239776611, + "loss_ce": 0.007695465348660946, + "loss_iou": 0.65625, + "loss_num": 0.059326171875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 214266488, + "step": 3235 + }, + { + "epoch": 0.3028969906865728, + "grad_norm": 31.310379028320312, + "learning_rate": 5e-05, + "loss": 1.5926, + "num_input_tokens_seen": 214333240, + "step": 3236 + }, + { + "epoch": 0.3028969906865728, + "loss": 1.5808790922164917, + "loss_ce": 0.0047071995213627815, + "loss_iou": 0.65234375, + "loss_num": 0.05517578125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 214333240, + "step": 3236 + }, + { + "epoch": 0.3029905929704685, + "grad_norm": 45.15845489501953, + "learning_rate": 5e-05, + "loss": 1.3997, + "num_input_tokens_seen": 214400204, + "step": 3237 + }, + { + "epoch": 0.3029905929704685, + "loss": 1.3295578956604004, + "loss_ce": 0.0048508960753679276, + "loss_iou": 0.546875, + "loss_num": 0.047119140625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 214400204, + "step": 3237 + }, + { + "epoch": 0.3030841952543642, + "grad_norm": 34.50333786010742, + "learning_rate": 5e-05, + "loss": 1.646, + "num_input_tokens_seen": 214466076, + "step": 3238 + }, + { + "epoch": 0.3030841952543642, + "loss": 1.7696900367736816, + "loss_ce": 0.008459556847810745, + "loss_iou": 0.625, + "loss_num": 0.1025390625, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 214466076, + "step": 3238 + }, + { + "epoch": 0.3031777975382599, + "grad_norm": 24.536317825317383, + "learning_rate": 5e-05, + "loss": 1.2985, + "num_input_tokens_seen": 214531960, + "step": 3239 + }, + { + "epoch": 0.3031777975382599, + "loss": 1.2813217639923096, + "loss_ce": 0.0045273685827851295, + "loss_iou": 0.482421875, + "loss_num": 0.062255859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 214531960, + "step": 3239 + }, + { + "epoch": 0.3032713998221557, + "grad_norm": 67.76273345947266, + "learning_rate": 5e-05, + "loss": 1.2146, + "num_input_tokens_seen": 214597236, + "step": 3240 + }, + { + "epoch": 0.3032713998221557, + "loss": 1.2255988121032715, + "loss_ce": 0.005872255191206932, + "loss_iou": 0.494140625, + "loss_num": 0.0458984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 214597236, + "step": 3240 + }, + { + "epoch": 0.3033650021060514, + "grad_norm": 16.857189178466797, + "learning_rate": 5e-05, + "loss": 1.2838, + "num_input_tokens_seen": 214662984, + "step": 3241 + }, + { + "epoch": 0.3033650021060514, + "loss": 1.1721733808517456, + "loss_ce": 0.009758800268173218, + "loss_iou": 0.455078125, + "loss_num": 0.05029296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 214662984, + "step": 3241 + }, + { + "epoch": 0.3034586043899471, + "grad_norm": 17.222375869750977, + "learning_rate": 5e-05, + "loss": 1.4831, + "num_input_tokens_seen": 214729136, + "step": 3242 + }, + { + "epoch": 0.3034586043899471, + "loss": 1.3036366701126099, + "loss_ce": 0.004320286214351654, + "loss_iou": 0.5390625, + "loss_num": 0.043701171875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 214729136, + "step": 3242 + }, + { + "epoch": 0.30355220667384286, + "grad_norm": 25.268510818481445, + "learning_rate": 5e-05, + "loss": 1.2756, + "num_input_tokens_seen": 214797088, + "step": 3243 + }, + { + "epoch": 0.30355220667384286, + "loss": 1.2916339635849, + "loss_ce": 0.006966054905205965, + "loss_iou": 0.52734375, + "loss_num": 0.045654296875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 214797088, + "step": 3243 + }, + { + "epoch": 0.30364580895773857, + "grad_norm": 19.913009643554688, + "learning_rate": 5e-05, + "loss": 1.2999, + "num_input_tokens_seen": 214862864, + "step": 3244 + }, + { + "epoch": 0.30364580895773857, + "loss": 1.1519443988800049, + "loss_ce": 0.004971779882907867, + "loss_iou": 0.478515625, + "loss_num": 0.0380859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 214862864, + "step": 3244 + }, + { + "epoch": 0.3037394112416343, + "grad_norm": 28.684814453125, + "learning_rate": 5e-05, + "loss": 1.3074, + "num_input_tokens_seen": 214929032, + "step": 3245 + }, + { + "epoch": 0.3037394112416343, + "loss": 1.4903351068496704, + "loss_ce": 0.00614324351772666, + "loss_iou": 0.60546875, + "loss_num": 0.053955078125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 214929032, + "step": 3245 + }, + { + "epoch": 0.30383301352553005, + "grad_norm": 25.049110412597656, + "learning_rate": 5e-05, + "loss": 1.5934, + "num_input_tokens_seen": 214995796, + "step": 3246 + }, + { + "epoch": 0.30383301352553005, + "loss": 1.5179600715637207, + "loss_ce": 0.002335167722776532, + "loss_iou": 0.609375, + "loss_num": 0.05908203125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 214995796, + "step": 3246 + }, + { + "epoch": 0.30392661580942576, + "grad_norm": 23.012149810791016, + "learning_rate": 5e-05, + "loss": 1.2905, + "num_input_tokens_seen": 215061024, + "step": 3247 + }, + { + "epoch": 0.30392661580942576, + "loss": 1.074303388595581, + "loss_ce": 0.0028923354111611843, + "loss_iou": 0.478515625, + "loss_num": 0.0230712890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 215061024, + "step": 3247 + }, + { + "epoch": 0.30402021809332147, + "grad_norm": 27.734107971191406, + "learning_rate": 5e-05, + "loss": 1.4956, + "num_input_tokens_seen": 215126792, + "step": 3248 + }, + { + "epoch": 0.30402021809332147, + "loss": 1.4625927209854126, + "loss_ce": 0.006049771327525377, + "loss_iou": 0.56640625, + "loss_num": 0.064453125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 215126792, + "step": 3248 + }, + { + "epoch": 0.3041138203772172, + "grad_norm": 31.783884048461914, + "learning_rate": 5e-05, + "loss": 1.3002, + "num_input_tokens_seen": 215192640, + "step": 3249 + }, + { + "epoch": 0.3041138203772172, + "loss": 1.352917194366455, + "loss_ce": 0.010143849067389965, + "loss_iou": 0.5703125, + "loss_num": 0.0400390625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 215192640, + "step": 3249 + }, + { + "epoch": 0.30420742266111295, + "grad_norm": 29.57720375061035, + "learning_rate": 5e-05, + "loss": 1.5682, + "num_input_tokens_seen": 215259140, + "step": 3250 + }, + { + "epoch": 0.30420742266111295, + "eval_seeclick_CIoU": 0.1455652415752411, + "eval_seeclick_GIoU": 0.1675873100757599, + "eval_seeclick_IoU": 0.28123968839645386, + "eval_seeclick_MAE_all": 0.1613713800907135, + "eval_seeclick_MAE_h": 0.0649598129093647, + "eval_seeclick_MAE_w": 0.11758418381214142, + "eval_seeclick_MAE_x_boxes": 0.25382599234580994, + "eval_seeclick_MAE_y_boxes": 0.1627851352095604, + "eval_seeclick_NUM_probability": 0.9999281167984009, + "eval_seeclick_inside_bbox": 0.4229166805744171, + "eval_seeclick_loss": 2.525183916091919, + "eval_seeclick_loss_ce": 0.013766091782599688, + "eval_seeclick_loss_iou": 0.8785400390625, + "eval_seeclick_loss_num": 0.16729736328125, + "eval_seeclick_loss_xval": 2.59326171875, + "eval_seeclick_runtime": 64.7086, + "eval_seeclick_samples_per_second": 0.726, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 215259140, + "step": 3250 + }, + { + "epoch": 0.30420742266111295, + "eval_icons_CIoU": -0.10694961249828339, + "eval_icons_GIoU": 0.029194827191531658, + "eval_icons_IoU": 0.09008727595210075, + "eval_icons_MAE_all": 0.18352647870779037, + "eval_icons_MAE_h": 0.12440169230103493, + "eval_icons_MAE_w": 0.1849340870976448, + "eval_icons_MAE_x_boxes": 0.17155595868825912, + "eval_icons_MAE_y_boxes": 0.08004642091691494, + "eval_icons_NUM_probability": 0.9999755620956421, + "eval_icons_inside_bbox": 0.1493055559694767, + "eval_icons_loss": 2.8794074058532715, + "eval_icons_loss_ce": 3.1658116313337814e-05, + "eval_icons_loss_iou": 0.975830078125, + "eval_icons_loss_num": 0.200042724609375, + "eval_icons_loss_xval": 2.9501953125, + "eval_icons_runtime": 67.4453, + "eval_icons_samples_per_second": 0.741, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 215259140, + "step": 3250 + }, + { + "epoch": 0.30420742266111295, + "eval_screenspot_CIoU": -0.025577165186405182, + "eval_screenspot_GIoU": 0.006226751953363419, + "eval_screenspot_IoU": 0.15358526011308035, + "eval_screenspot_MAE_all": 0.22168662150700888, + "eval_screenspot_MAE_h": 0.14507206281026205, + "eval_screenspot_MAE_w": 0.17006426552931467, + "eval_screenspot_MAE_x_boxes": 0.34340771039326984, + "eval_screenspot_MAE_y_boxes": 0.14694421738386154, + "eval_screenspot_NUM_probability": 0.9999379515647888, + "eval_screenspot_inside_bbox": 0.2941666642824809, + "eval_screenspot_loss": 3.1456081867218018, + "eval_screenspot_loss_ce": 0.005427776525417964, + "eval_screenspot_loss_iou": 1.0172526041666667, + "eval_screenspot_loss_num": 0.2325439453125, + "eval_screenspot_loss_xval": 3.1966145833333335, + "eval_screenspot_runtime": 117.4274, + "eval_screenspot_samples_per_second": 0.758, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 215259140, + "step": 3250 + }, + { + "epoch": 0.30420742266111295, + "eval_compot_CIoU": -0.01815701834857464, + "eval_compot_GIoU": 0.0553207378834486, + "eval_compot_IoU": 0.14277129620313644, + "eval_compot_MAE_all": 0.1962691694498062, + "eval_compot_MAE_h": 0.1110198013484478, + "eval_compot_MAE_w": 0.16523808985948563, + "eval_compot_MAE_x_boxes": 0.2056160643696785, + "eval_compot_MAE_y_boxes": 0.14012055471539497, + "eval_compot_NUM_probability": 0.9998982548713684, + "eval_compot_inside_bbox": 0.1736111119389534, + "eval_compot_loss": 2.8881747722625732, + "eval_compot_loss_ce": 0.001730227144435048, + "eval_compot_loss_iou": 0.95166015625, + "eval_compot_loss_num": 0.1961212158203125, + "eval_compot_loss_xval": 2.8837890625, + "eval_compot_runtime": 68.3336, + "eval_compot_samples_per_second": 0.732, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 215259140, + "step": 3250 + }, + { + "epoch": 0.30420742266111295, + "eval_custom_ui_MAE_all": 0.17363159358501434, + "eval_custom_ui_MAE_x": 0.15588220953941345, + "eval_custom_ui_MAE_y": 0.19138099253177643, + "eval_custom_ui_NUM_probability": 0.9998042583465576, + "eval_custom_ui_loss": 0.9675492644309998, + "eval_custom_ui_loss_ce": 0.1515451893210411, + "eval_custom_ui_loss_num": 0.17529296875, + "eval_custom_ui_loss_xval": 0.87548828125, + "eval_custom_ui_runtime": 62.3142, + "eval_custom_ui_samples_per_second": 0.802, + "eval_custom_ui_steps_per_second": 0.032, + "num_input_tokens_seen": 215259140, + "step": 3250 + }, + { + "epoch": 0.30420742266111295, + "loss": 1.0495203733444214, + "loss_ce": 0.175008624792099, + "loss_iou": 0.0, + "loss_num": 0.1748046875, + "loss_xval": 0.875, + "num_input_tokens_seen": 215259140, + "step": 3250 + }, + { + "epoch": 0.30430102494500866, + "grad_norm": 16.616992950439453, + "learning_rate": 5e-05, + "loss": 1.3986, + "num_input_tokens_seen": 215325076, + "step": 3251 + }, + { + "epoch": 0.30430102494500866, + "loss": 1.1711382865905762, + "loss_ce": 0.004146065097302198, + "loss_iou": 0.490234375, + "loss_num": 0.037841796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 215325076, + "step": 3251 + }, + { + "epoch": 0.30439462722890437, + "grad_norm": 24.00971221923828, + "learning_rate": 5e-05, + "loss": 1.1938, + "num_input_tokens_seen": 215391292, + "step": 3252 + }, + { + "epoch": 0.30439462722890437, + "loss": 1.2654268741607666, + "loss_ce": 0.00614947360008955, + "loss_iou": 0.5390625, + "loss_num": 0.036376953125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 215391292, + "step": 3252 + }, + { + "epoch": 0.30448822951280013, + "grad_norm": 27.957509994506836, + "learning_rate": 5e-05, + "loss": 1.4233, + "num_input_tokens_seen": 215455908, + "step": 3253 + }, + { + "epoch": 0.30448822951280013, + "loss": 1.4892008304595947, + "loss_ce": 0.009708629921078682, + "loss_iou": 0.51953125, + "loss_num": 0.0888671875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 215455908, + "step": 3253 + }, + { + "epoch": 0.30458183179669585, + "grad_norm": 129.54901123046875, + "learning_rate": 5e-05, + "loss": 1.4067, + "num_input_tokens_seen": 215522272, + "step": 3254 + }, + { + "epoch": 0.30458183179669585, + "loss": 1.354069471359253, + "loss_ce": 0.009343021549284458, + "loss_iou": 0.58984375, + "loss_num": 0.032958984375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 215522272, + "step": 3254 + }, + { + "epoch": 0.30467543408059156, + "grad_norm": 26.872020721435547, + "learning_rate": 5e-05, + "loss": 1.3032, + "num_input_tokens_seen": 215589168, + "step": 3255 + }, + { + "epoch": 0.30467543408059156, + "loss": 1.2833125591278076, + "loss_ce": 0.00767773762345314, + "loss_iou": 0.5, + "loss_num": 0.054443359375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 215589168, + "step": 3255 + }, + { + "epoch": 0.30476903636448727, + "grad_norm": 24.30497932434082, + "learning_rate": 5e-05, + "loss": 1.3866, + "num_input_tokens_seen": 215656732, + "step": 3256 + }, + { + "epoch": 0.30476903636448727, + "loss": 1.5192744731903076, + "loss_ce": 0.00316119147464633, + "loss_iou": 0.62890625, + "loss_num": 0.051025390625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 215656732, + "step": 3256 + }, + { + "epoch": 0.30486263864838303, + "grad_norm": 29.15682029724121, + "learning_rate": 5e-05, + "loss": 1.4388, + "num_input_tokens_seen": 215721492, + "step": 3257 + }, + { + "epoch": 0.30486263864838303, + "loss": 1.6248369216918945, + "loss_ce": 0.006184583995491266, + "loss_iou": 0.6796875, + "loss_num": 0.052978515625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 215721492, + "step": 3257 + }, + { + "epoch": 0.30495624093227874, + "grad_norm": 34.21500778198242, + "learning_rate": 5e-05, + "loss": 1.2608, + "num_input_tokens_seen": 215787724, + "step": 3258 + }, + { + "epoch": 0.30495624093227874, + "loss": 1.2550270557403564, + "loss_ce": 0.003562240395694971, + "loss_iou": 0.55078125, + "loss_num": 0.0299072265625, + "loss_xval": 1.25, + "num_input_tokens_seen": 215787724, + "step": 3258 + }, + { + "epoch": 0.30504984321617445, + "grad_norm": 46.901206970214844, + "learning_rate": 5e-05, + "loss": 1.4256, + "num_input_tokens_seen": 215853812, + "step": 3259 + }, + { + "epoch": 0.30504984321617445, + "loss": 1.5135836601257324, + "loss_ce": 0.011630570515990257, + "loss_iou": 0.56640625, + "loss_num": 0.0732421875, + "loss_xval": 1.5, + "num_input_tokens_seen": 215853812, + "step": 3259 + }, + { + "epoch": 0.3051434455000702, + "grad_norm": 23.632041931152344, + "learning_rate": 5e-05, + "loss": 1.6057, + "num_input_tokens_seen": 215920840, + "step": 3260 + }, + { + "epoch": 0.3051434455000702, + "loss": 1.6945726871490479, + "loss_ce": 0.004143000580370426, + "loss_iou": 0.6875, + "loss_num": 0.062255859375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 215920840, + "step": 3260 + }, + { + "epoch": 0.30523704778396593, + "grad_norm": 24.994848251342773, + "learning_rate": 5e-05, + "loss": 1.1699, + "num_input_tokens_seen": 215987696, + "step": 3261 + }, + { + "epoch": 0.30523704778396593, + "loss": 1.244982123374939, + "loss_ce": 0.0027946701738983393, + "loss_iou": 0.5078125, + "loss_num": 0.04541015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 215987696, + "step": 3261 + }, + { + "epoch": 0.30533065006786164, + "grad_norm": 21.05302619934082, + "learning_rate": 5e-05, + "loss": 1.2648, + "num_input_tokens_seen": 216053572, + "step": 3262 + }, + { + "epoch": 0.30533065006786164, + "loss": 0.926849365234375, + "loss_ce": 0.005950903054326773, + "loss_iou": 0.384765625, + "loss_num": 0.0301513671875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 216053572, + "step": 3262 + }, + { + "epoch": 0.3054242523517574, + "grad_norm": 28.137279510498047, + "learning_rate": 5e-05, + "loss": 1.4048, + "num_input_tokens_seen": 216119900, + "step": 3263 + }, + { + "epoch": 0.3054242523517574, + "loss": 1.5935150384902954, + "loss_ce": 0.003671366721391678, + "loss_iou": 0.61328125, + "loss_num": 0.0732421875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 216119900, + "step": 3263 + }, + { + "epoch": 0.3055178546356531, + "grad_norm": 103.77515411376953, + "learning_rate": 5e-05, + "loss": 1.5608, + "num_input_tokens_seen": 216185992, + "step": 3264 + }, + { + "epoch": 0.3055178546356531, + "loss": 1.6350138187408447, + "loss_ce": 0.005130899604409933, + "loss_iou": 0.7109375, + "loss_num": 0.04248046875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 216185992, + "step": 3264 + }, + { + "epoch": 0.30561145691954883, + "grad_norm": 41.907386779785156, + "learning_rate": 5e-05, + "loss": 1.1478, + "num_input_tokens_seen": 216251256, + "step": 3265 + }, + { + "epoch": 0.30561145691954883, + "loss": 1.234153389930725, + "loss_ce": 0.0034404967445880175, + "loss_iou": 0.4765625, + "loss_num": 0.0556640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 216251256, + "step": 3265 + }, + { + "epoch": 0.30570505920344454, + "grad_norm": 21.314462661743164, + "learning_rate": 5e-05, + "loss": 1.3408, + "num_input_tokens_seen": 216318780, + "step": 3266 + }, + { + "epoch": 0.30570505920344454, + "loss": 1.218451976776123, + "loss_ce": 0.006049755960702896, + "loss_iou": 0.5234375, + "loss_num": 0.0322265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 216318780, + "step": 3266 + }, + { + "epoch": 0.3057986614873403, + "grad_norm": 43.160789489746094, + "learning_rate": 5e-05, + "loss": 1.3229, + "num_input_tokens_seen": 216385228, + "step": 3267 + }, + { + "epoch": 0.3057986614873403, + "loss": 1.3583934307098389, + "loss_ce": 0.005854426883161068, + "loss_iou": 0.5859375, + "loss_num": 0.036376953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 216385228, + "step": 3267 + }, + { + "epoch": 0.305892263771236, + "grad_norm": 26.396848678588867, + "learning_rate": 5e-05, + "loss": 1.2981, + "num_input_tokens_seen": 216450812, + "step": 3268 + }, + { + "epoch": 0.305892263771236, + "loss": 1.1707066297531128, + "loss_ce": 0.004446827806532383, + "loss_iou": 0.44140625, + "loss_num": 0.056884765625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 216450812, + "step": 3268 + }, + { + "epoch": 0.30598586605513173, + "grad_norm": 15.987907409667969, + "learning_rate": 5e-05, + "loss": 1.2681, + "num_input_tokens_seen": 216516500, + "step": 3269 + }, + { + "epoch": 0.30598586605513173, + "loss": 1.4139719009399414, + "loss_ce": 0.005768820643424988, + "loss_iou": 0.5625, + "loss_num": 0.056396484375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 216516500, + "step": 3269 + }, + { + "epoch": 0.3060794683390275, + "grad_norm": 14.837997436523438, + "learning_rate": 5e-05, + "loss": 1.2282, + "num_input_tokens_seen": 216583640, + "step": 3270 + }, + { + "epoch": 0.3060794683390275, + "loss": 1.2314945459365845, + "loss_ce": 0.00493203941732645, + "loss_iou": 0.48828125, + "loss_num": 0.050048828125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 216583640, + "step": 3270 + }, + { + "epoch": 0.3061730706229232, + "grad_norm": 44.174407958984375, + "learning_rate": 5e-05, + "loss": 1.2446, + "num_input_tokens_seen": 216649352, + "step": 3271 + }, + { + "epoch": 0.3061730706229232, + "loss": 1.2967551946640015, + "loss_ce": 0.004763028584420681, + "loss_iou": 0.51953125, + "loss_num": 0.050537109375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 216649352, + "step": 3271 + }, + { + "epoch": 0.3062666729068189, + "grad_norm": 20.741392135620117, + "learning_rate": 5e-05, + "loss": 1.4702, + "num_input_tokens_seen": 216717100, + "step": 3272 + }, + { + "epoch": 0.3062666729068189, + "loss": 1.4455691576004028, + "loss_ce": 0.004651239141821861, + "loss_iou": 0.5703125, + "loss_num": 0.06103515625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 216717100, + "step": 3272 + }, + { + "epoch": 0.3063602751907146, + "grad_norm": 24.962942123413086, + "learning_rate": 5e-05, + "loss": 1.292, + "num_input_tokens_seen": 216783644, + "step": 3273 + }, + { + "epoch": 0.3063602751907146, + "loss": 1.1587369441986084, + "loss_ce": 0.00419585220515728, + "loss_iou": 0.453125, + "loss_num": 0.050048828125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 216783644, + "step": 3273 + }, + { + "epoch": 0.3064538774746104, + "grad_norm": 31.80834197998047, + "learning_rate": 5e-05, + "loss": 1.4634, + "num_input_tokens_seen": 216850756, + "step": 3274 + }, + { + "epoch": 0.3064538774746104, + "loss": 1.445806622505188, + "loss_ce": 0.0034238863736391068, + "loss_iou": 0.609375, + "loss_num": 0.04443359375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 216850756, + "step": 3274 + }, + { + "epoch": 0.3065474797585061, + "grad_norm": 41.55213928222656, + "learning_rate": 5e-05, + "loss": 1.4646, + "num_input_tokens_seen": 216917756, + "step": 3275 + }, + { + "epoch": 0.3065474797585061, + "loss": 1.6461317539215088, + "loss_ce": 0.008436314761638641, + "loss_iou": 0.69921875, + "loss_num": 0.048095703125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 216917756, + "step": 3275 + }, + { + "epoch": 0.3066410820424018, + "grad_norm": 46.32811737060547, + "learning_rate": 5e-05, + "loss": 1.3779, + "num_input_tokens_seen": 216984336, + "step": 3276 + }, + { + "epoch": 0.3066410820424018, + "loss": 1.4777175188064575, + "loss_ce": 0.004572918172925711, + "loss_iou": 0.6328125, + "loss_num": 0.041259765625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 216984336, + "step": 3276 + }, + { + "epoch": 0.3067346843262976, + "grad_norm": 26.338844299316406, + "learning_rate": 5e-05, + "loss": 1.2143, + "num_input_tokens_seen": 217050360, + "step": 3277 + }, + { + "epoch": 0.3067346843262976, + "loss": 1.189054250717163, + "loss_ce": 0.0030038978438824415, + "loss_iou": 0.462890625, + "loss_num": 0.052001953125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 217050360, + "step": 3277 + }, + { + "epoch": 0.3068282866101933, + "grad_norm": 16.650480270385742, + "learning_rate": 5e-05, + "loss": 1.0909, + "num_input_tokens_seen": 217115916, + "step": 3278 + }, + { + "epoch": 0.3068282866101933, + "loss": 1.0408778190612793, + "loss_ce": 0.0029140515252947807, + "loss_iou": 0.396484375, + "loss_num": 0.04931640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 217115916, + "step": 3278 + }, + { + "epoch": 0.306921888894089, + "grad_norm": 12.268118858337402, + "learning_rate": 5e-05, + "loss": 1.0564, + "num_input_tokens_seen": 217182468, + "step": 3279 + }, + { + "epoch": 0.306921888894089, + "loss": 1.0205044746398926, + "loss_ce": 0.006344387773424387, + "loss_iou": 0.45703125, + "loss_num": 0.0205078125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 217182468, + "step": 3279 + }, + { + "epoch": 0.30701549117798477, + "grad_norm": 23.875041961669922, + "learning_rate": 5e-05, + "loss": 1.3049, + "num_input_tokens_seen": 217249680, + "step": 3280 + }, + { + "epoch": 0.30701549117798477, + "loss": 1.3264915943145752, + "loss_ce": 0.005690774414688349, + "loss_iou": 0.55859375, + "loss_num": 0.0400390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 217249680, + "step": 3280 + }, + { + "epoch": 0.3071090934618805, + "grad_norm": 49.74822998046875, + "learning_rate": 5e-05, + "loss": 1.6603, + "num_input_tokens_seen": 217315084, + "step": 3281 + }, + { + "epoch": 0.3071090934618805, + "loss": 1.8198919296264648, + "loss_ce": 0.005438767373561859, + "loss_iou": 0.70703125, + "loss_num": 0.07958984375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 217315084, + "step": 3281 + }, + { + "epoch": 0.3072026957457762, + "grad_norm": 26.989891052246094, + "learning_rate": 5e-05, + "loss": 1.3183, + "num_input_tokens_seen": 217381132, + "step": 3282 + }, + { + "epoch": 0.3072026957457762, + "loss": 1.295100450515747, + "loss_ce": 0.0031082441564649343, + "loss_iou": 0.56640625, + "loss_num": 0.031005859375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 217381132, + "step": 3282 + }, + { + "epoch": 0.3072962980296719, + "grad_norm": 106.77875518798828, + "learning_rate": 5e-05, + "loss": 1.3803, + "num_input_tokens_seen": 217447956, + "step": 3283 + }, + { + "epoch": 0.3072962980296719, + "loss": 1.539642333984375, + "loss_ce": 0.0015564555069431663, + "loss_iou": 0.609375, + "loss_num": 0.0634765625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 217447956, + "step": 3283 + }, + { + "epoch": 0.30738990031356767, + "grad_norm": 42.692012786865234, + "learning_rate": 5e-05, + "loss": 1.3951, + "num_input_tokens_seen": 217515024, + "step": 3284 + }, + { + "epoch": 0.30738990031356767, + "loss": 1.3151333332061768, + "loss_ce": 0.006539489608258009, + "loss_iou": 0.51953125, + "loss_num": 0.0537109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 217515024, + "step": 3284 + }, + { + "epoch": 0.3074835025974634, + "grad_norm": 23.102754592895508, + "learning_rate": 5e-05, + "loss": 1.2996, + "num_input_tokens_seen": 217580552, + "step": 3285 + }, + { + "epoch": 0.3074835025974634, + "loss": 1.2344200611114502, + "loss_ce": 0.00907817017287016, + "loss_iou": 0.4375, + "loss_num": 0.0703125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 217580552, + "step": 3285 + }, + { + "epoch": 0.3075771048813591, + "grad_norm": 21.07726287841797, + "learning_rate": 5e-05, + "loss": 1.2692, + "num_input_tokens_seen": 217647536, + "step": 3286 + }, + { + "epoch": 0.3075771048813591, + "loss": 1.365906000137329, + "loss_ce": 0.00457786163315177, + "loss_iou": 0.546875, + "loss_num": 0.05419921875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 217647536, + "step": 3286 + }, + { + "epoch": 0.30767070716525485, + "grad_norm": 24.006851196289062, + "learning_rate": 5e-05, + "loss": 1.4339, + "num_input_tokens_seen": 217712684, + "step": 3287 + }, + { + "epoch": 0.30767070716525485, + "loss": 1.4776030778884888, + "loss_ce": 0.008731073699891567, + "loss_iou": 0.57421875, + "loss_num": 0.06396484375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 217712684, + "step": 3287 + }, + { + "epoch": 0.30776430944915056, + "grad_norm": 37.91904067993164, + "learning_rate": 5e-05, + "loss": 1.3971, + "num_input_tokens_seen": 217779820, + "step": 3288 + }, + { + "epoch": 0.30776430944915056, + "loss": 1.4429993629455566, + "loss_ce": 0.007940869778394699, + "loss_iou": 0.5859375, + "loss_num": 0.052734375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 217779820, + "step": 3288 + }, + { + "epoch": 0.3078579117330463, + "grad_norm": 20.647216796875, + "learning_rate": 5e-05, + "loss": 1.603, + "num_input_tokens_seen": 217846684, + "step": 3289 + }, + { + "epoch": 0.3078579117330463, + "loss": 1.3160533905029297, + "loss_ce": 0.004529984667897224, + "loss_iou": 0.56640625, + "loss_num": 0.034912109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 217846684, + "step": 3289 + }, + { + "epoch": 0.30795151401694204, + "grad_norm": 14.967142105102539, + "learning_rate": 5e-05, + "loss": 1.2879, + "num_input_tokens_seen": 217912200, + "step": 3290 + }, + { + "epoch": 0.30795151401694204, + "loss": 1.2119174003601074, + "loss_ce": 0.007815740071237087, + "loss_iou": 0.5078125, + "loss_num": 0.037353515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 217912200, + "step": 3290 + }, + { + "epoch": 0.30804511630083775, + "grad_norm": 26.79849624633789, + "learning_rate": 5e-05, + "loss": 1.4611, + "num_input_tokens_seen": 217979080, + "step": 3291 + }, + { + "epoch": 0.30804511630083775, + "loss": 1.554707646369934, + "loss_ce": 0.0063678547739982605, + "loss_iou": 0.62109375, + "loss_num": 0.06103515625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 217979080, + "step": 3291 + }, + { + "epoch": 0.30813871858473346, + "grad_norm": 28.4384822845459, + "learning_rate": 5e-05, + "loss": 1.629, + "num_input_tokens_seen": 218046008, + "step": 3292 + }, + { + "epoch": 0.30813871858473346, + "loss": 1.515143632888794, + "loss_ce": 0.006354586221277714, + "loss_iou": 0.59375, + "loss_num": 0.064453125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 218046008, + "step": 3292 + }, + { + "epoch": 0.3082323208686292, + "grad_norm": 43.775630950927734, + "learning_rate": 5e-05, + "loss": 1.4897, + "num_input_tokens_seen": 218111388, + "step": 3293 + }, + { + "epoch": 0.3082323208686292, + "loss": 1.4442451000213623, + "loss_ce": 0.006012683734297752, + "loss_iou": 0.5859375, + "loss_num": 0.052978515625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 218111388, + "step": 3293 + }, + { + "epoch": 0.30832592315252494, + "grad_norm": 23.99239158630371, + "learning_rate": 5e-05, + "loss": 1.5258, + "num_input_tokens_seen": 218176880, + "step": 3294 + }, + { + "epoch": 0.30832592315252494, + "loss": 1.530544638633728, + "loss_ce": 0.007107154466211796, + "loss_iou": 0.6484375, + "loss_num": 0.0458984375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 218176880, + "step": 3294 + }, + { + "epoch": 0.30841952543642065, + "grad_norm": 33.39753723144531, + "learning_rate": 5e-05, + "loss": 1.4145, + "num_input_tokens_seen": 218242624, + "step": 3295 + }, + { + "epoch": 0.30841952543642065, + "loss": 1.3502821922302246, + "loss_ce": 0.0021375836804509163, + "loss_iou": 0.55859375, + "loss_num": 0.0458984375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 218242624, + "step": 3295 + }, + { + "epoch": 0.30851312772031636, + "grad_norm": 24.41062355041504, + "learning_rate": 5e-05, + "loss": 1.372, + "num_input_tokens_seen": 218308764, + "step": 3296 + }, + { + "epoch": 0.30851312772031636, + "loss": 1.1369571685791016, + "loss_ce": 0.008050918579101562, + "loss_iou": 0.408203125, + "loss_num": 0.062255859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 218308764, + "step": 3296 + }, + { + "epoch": 0.3086067300042121, + "grad_norm": 24.676944732666016, + "learning_rate": 5e-05, + "loss": 1.5436, + "num_input_tokens_seen": 218374720, + "step": 3297 + }, + { + "epoch": 0.3086067300042121, + "loss": 1.6637136936187744, + "loss_ce": 0.004045591689646244, + "loss_iou": 0.63671875, + "loss_num": 0.078125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 218374720, + "step": 3297 + }, + { + "epoch": 0.30870033228810784, + "grad_norm": 30.753559112548828, + "learning_rate": 5e-05, + "loss": 1.2207, + "num_input_tokens_seen": 218440992, + "step": 3298 + }, + { + "epoch": 0.30870033228810784, + "loss": 1.268710970878601, + "loss_ce": 0.0050390600226819515, + "loss_iou": 0.5078125, + "loss_num": 0.049560546875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 218440992, + "step": 3298 + }, + { + "epoch": 0.30879393457200355, + "grad_norm": 21.44145965576172, + "learning_rate": 5e-05, + "loss": 1.5016, + "num_input_tokens_seen": 218507732, + "step": 3299 + }, + { + "epoch": 0.30879393457200355, + "loss": 1.3323389291763306, + "loss_ce": 0.006167007610201836, + "loss_iou": 0.5703125, + "loss_num": 0.037353515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 218507732, + "step": 3299 + }, + { + "epoch": 0.30888753685589926, + "grad_norm": 36.36735534667969, + "learning_rate": 5e-05, + "loss": 1.4294, + "num_input_tokens_seen": 218573172, + "step": 3300 + }, + { + "epoch": 0.30888753685589926, + "loss": 1.4702776670455933, + "loss_ce": 0.004945619031786919, + "loss_iou": 0.58984375, + "loss_num": 0.0576171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 218573172, + "step": 3300 + }, + { + "epoch": 0.308981139139795, + "grad_norm": 24.736528396606445, + "learning_rate": 5e-05, + "loss": 1.1731, + "num_input_tokens_seen": 218638484, + "step": 3301 + }, + { + "epoch": 0.308981139139795, + "loss": 1.0958927869796753, + "loss_ce": 0.0038518088404089212, + "loss_iou": 0.423828125, + "loss_num": 0.049072265625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 218638484, + "step": 3301 + }, + { + "epoch": 0.30907474142369074, + "grad_norm": 42.26384353637695, + "learning_rate": 5e-05, + "loss": 1.405, + "num_input_tokens_seen": 218705076, + "step": 3302 + }, + { + "epoch": 0.30907474142369074, + "loss": 1.3072919845581055, + "loss_ce": 0.005045905243605375, + "loss_iou": 0.55078125, + "loss_num": 0.03955078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 218705076, + "step": 3302 + }, + { + "epoch": 0.30916834370758645, + "grad_norm": 35.38228988647461, + "learning_rate": 5e-05, + "loss": 1.3409, + "num_input_tokens_seen": 218769772, + "step": 3303 + }, + { + "epoch": 0.30916834370758645, + "loss": 1.3389854431152344, + "loss_ce": 0.003292145673185587, + "loss_iou": 0.52734375, + "loss_num": 0.056396484375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 218769772, + "step": 3303 + }, + { + "epoch": 0.3092619459914822, + "grad_norm": 18.485929489135742, + "learning_rate": 5e-05, + "loss": 1.2342, + "num_input_tokens_seen": 218836916, + "step": 3304 + }, + { + "epoch": 0.3092619459914822, + "loss": 1.1005358695983887, + "loss_ce": 0.006297651678323746, + "loss_iou": 0.466796875, + "loss_num": 0.032470703125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 218836916, + "step": 3304 + }, + { + "epoch": 0.3093555482753779, + "grad_norm": 34.8907585144043, + "learning_rate": 5e-05, + "loss": 1.1725, + "num_input_tokens_seen": 218902848, + "step": 3305 + }, + { + "epoch": 0.3093555482753779, + "loss": 1.3086987733840942, + "loss_ce": 0.00852786935865879, + "loss_iou": 0.5078125, + "loss_num": 0.057373046875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 218902848, + "step": 3305 + }, + { + "epoch": 0.30944915055927363, + "grad_norm": 23.96047592163086, + "learning_rate": 5e-05, + "loss": 1.3576, + "num_input_tokens_seen": 218968712, + "step": 3306 + }, + { + "epoch": 0.30944915055927363, + "loss": 1.3991553783416748, + "loss_ce": 0.008042062632739544, + "loss_iou": 0.625, + "loss_num": 0.0286865234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 218968712, + "step": 3306 + }, + { + "epoch": 0.3095427528431694, + "grad_norm": 19.13654136657715, + "learning_rate": 5e-05, + "loss": 1.3225, + "num_input_tokens_seen": 219035900, + "step": 3307 + }, + { + "epoch": 0.3095427528431694, + "loss": 1.3272422552108765, + "loss_ce": 0.003023592522367835, + "loss_iou": 0.5625, + "loss_num": 0.0390625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 219035900, + "step": 3307 + }, + { + "epoch": 0.3096363551270651, + "grad_norm": 29.159271240234375, + "learning_rate": 5e-05, + "loss": 1.4599, + "num_input_tokens_seen": 219101832, + "step": 3308 + }, + { + "epoch": 0.3096363551270651, + "loss": 1.5176429748535156, + "loss_ce": 0.004459417425096035, + "loss_iou": 0.63671875, + "loss_num": 0.04736328125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 219101832, + "step": 3308 + }, + { + "epoch": 0.3097299574109608, + "grad_norm": 35.58757019042969, + "learning_rate": 5e-05, + "loss": 1.4017, + "num_input_tokens_seen": 219167928, + "step": 3309 + }, + { + "epoch": 0.3097299574109608, + "loss": 1.4925603866577148, + "loss_ce": 0.006476448383182287, + "loss_iou": 0.59765625, + "loss_num": 0.0576171875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 219167928, + "step": 3309 + }, + { + "epoch": 0.30982355969485653, + "grad_norm": 32.41355895996094, + "learning_rate": 5e-05, + "loss": 1.8041, + "num_input_tokens_seen": 219234704, + "step": 3310 + }, + { + "epoch": 0.30982355969485653, + "loss": 1.7300525903701782, + "loss_ce": 0.005443126894533634, + "loss_iou": 0.7109375, + "loss_num": 0.06005859375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 219234704, + "step": 3310 + }, + { + "epoch": 0.3099171619787523, + "grad_norm": 16.624296188354492, + "learning_rate": 5e-05, + "loss": 1.4817, + "num_input_tokens_seen": 219300252, + "step": 3311 + }, + { + "epoch": 0.3099171619787523, + "loss": 1.4687089920043945, + "loss_ce": 0.0038653004448860884, + "loss_iou": 0.5703125, + "loss_num": 0.064453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 219300252, + "step": 3311 + }, + { + "epoch": 0.310010764262648, + "grad_norm": 17.57419776916504, + "learning_rate": 5e-05, + "loss": 1.2537, + "num_input_tokens_seen": 219366304, + "step": 3312 + }, + { + "epoch": 0.310010764262648, + "loss": 1.2607388496398926, + "loss_ce": 0.006344245281070471, + "loss_iou": 0.54296875, + "loss_num": 0.0341796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 219366304, + "step": 3312 + }, + { + "epoch": 0.3101043665465437, + "grad_norm": 34.53327560424805, + "learning_rate": 5e-05, + "loss": 1.1657, + "num_input_tokens_seen": 219431020, + "step": 3313 + }, + { + "epoch": 0.3101043665465437, + "loss": 1.180055022239685, + "loss_ce": 0.006715222727507353, + "loss_iou": 0.4375, + "loss_num": 0.0595703125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 219431020, + "step": 3313 + }, + { + "epoch": 0.3101979688304395, + "grad_norm": 43.260459899902344, + "learning_rate": 5e-05, + "loss": 1.4908, + "num_input_tokens_seen": 219496608, + "step": 3314 + }, + { + "epoch": 0.3101979688304395, + "loss": 1.4541683197021484, + "loss_ce": 0.003973029553890228, + "loss_iou": 0.55859375, + "loss_num": 0.06591796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 219496608, + "step": 3314 + }, + { + "epoch": 0.3102915711143352, + "grad_norm": 54.946163177490234, + "learning_rate": 5e-05, + "loss": 1.3666, + "num_input_tokens_seen": 219562448, + "step": 3315 + }, + { + "epoch": 0.3102915711143352, + "loss": 1.3770301342010498, + "loss_ce": 0.003983244299888611, + "loss_iou": 0.5546875, + "loss_num": 0.052734375, + "loss_xval": 1.375, + "num_input_tokens_seen": 219562448, + "step": 3315 + }, + { + "epoch": 0.3103851733982309, + "grad_norm": 23.593570709228516, + "learning_rate": 5e-05, + "loss": 1.569, + "num_input_tokens_seen": 219628164, + "step": 3316 + }, + { + "epoch": 0.3103851733982309, + "loss": 1.5762691497802734, + "loss_ce": 0.005956693552434444, + "loss_iou": 0.6875, + "loss_num": 0.038818359375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 219628164, + "step": 3316 + }, + { + "epoch": 0.3104787756821266, + "grad_norm": 19.556455612182617, + "learning_rate": 5e-05, + "loss": 1.1861, + "num_input_tokens_seen": 219694576, + "step": 3317 + }, + { + "epoch": 0.3104787756821266, + "loss": 1.2614383697509766, + "loss_ce": 0.01143829058855772, + "loss_iou": 0.46875, + "loss_num": 0.0625, + "loss_xval": 1.25, + "num_input_tokens_seen": 219694576, + "step": 3317 + }, + { + "epoch": 0.3105723779660224, + "grad_norm": 25.187734603881836, + "learning_rate": 5e-05, + "loss": 1.4119, + "num_input_tokens_seen": 219761096, + "step": 3318 + }, + { + "epoch": 0.3105723779660224, + "loss": 1.5257301330566406, + "loss_ce": 0.0022926232777535915, + "loss_iou": 0.625, + "loss_num": 0.05419921875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 219761096, + "step": 3318 + }, + { + "epoch": 0.3106659802499181, + "grad_norm": 31.622879028320312, + "learning_rate": 5e-05, + "loss": 1.571, + "num_input_tokens_seen": 219826468, + "step": 3319 + }, + { + "epoch": 0.3106659802499181, + "loss": 1.4179356098175049, + "loss_ce": 0.005826190579682589, + "loss_iou": 0.53515625, + "loss_num": 0.06787109375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 219826468, + "step": 3319 + }, + { + "epoch": 0.3107595825338138, + "grad_norm": 23.731725692749023, + "learning_rate": 5e-05, + "loss": 1.3923, + "num_input_tokens_seen": 219892660, + "step": 3320 + }, + { + "epoch": 0.3107595825338138, + "loss": 1.3493430614471436, + "loss_ce": 0.00315176323056221, + "loss_iou": 0.6015625, + "loss_num": 0.0286865234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 219892660, + "step": 3320 + }, + { + "epoch": 0.31085318481770957, + "grad_norm": 32.589866638183594, + "learning_rate": 5e-05, + "loss": 1.5546, + "num_input_tokens_seen": 219958688, + "step": 3321 + }, + { + "epoch": 0.31085318481770957, + "loss": 1.2926206588745117, + "loss_ce": 0.004046455956995487, + "loss_iou": 0.515625, + "loss_num": 0.051513671875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 219958688, + "step": 3321 + }, + { + "epoch": 0.3109467871016053, + "grad_norm": 18.977188110351562, + "learning_rate": 5e-05, + "loss": 1.4326, + "num_input_tokens_seen": 220025348, + "step": 3322 + }, + { + "epoch": 0.3109467871016053, + "loss": 1.5848474502563477, + "loss_ce": 0.00867548305541277, + "loss_iou": 0.66796875, + "loss_num": 0.04736328125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 220025348, + "step": 3322 + }, + { + "epoch": 0.311040389385501, + "grad_norm": 20.86507797241211, + "learning_rate": 5e-05, + "loss": 1.2166, + "num_input_tokens_seen": 220091376, + "step": 3323 + }, + { + "epoch": 0.311040389385501, + "loss": 1.0473569631576538, + "loss_ce": 0.004266200587153435, + "loss_iou": 0.396484375, + "loss_num": 0.0498046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 220091376, + "step": 3323 + }, + { + "epoch": 0.31113399166939676, + "grad_norm": 20.868701934814453, + "learning_rate": 5e-05, + "loss": 1.4512, + "num_input_tokens_seen": 220156540, + "step": 3324 + }, + { + "epoch": 0.31113399166939676, + "loss": 1.4235413074493408, + "loss_ce": 0.0065492019057273865, + "loss_iou": 0.56640625, + "loss_num": 0.056396484375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 220156540, + "step": 3324 + }, + { + "epoch": 0.31122759395329247, + "grad_norm": 62.60850524902344, + "learning_rate": 5e-05, + "loss": 1.519, + "num_input_tokens_seen": 220223020, + "step": 3325 + }, + { + "epoch": 0.31122759395329247, + "loss": 1.3537803888320923, + "loss_ce": 0.009053874760866165, + "loss_iou": 0.5625, + "loss_num": 0.04443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 220223020, + "step": 3325 + }, + { + "epoch": 0.3113211962371882, + "grad_norm": 27.41938018798828, + "learning_rate": 5e-05, + "loss": 1.4091, + "num_input_tokens_seen": 220290112, + "step": 3326 + }, + { + "epoch": 0.3113211962371882, + "loss": 1.1774846315383911, + "loss_ce": 0.005121318623423576, + "loss_iou": 0.515625, + "loss_num": 0.028076171875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 220290112, + "step": 3326 + }, + { + "epoch": 0.3114147985210839, + "grad_norm": 22.340200424194336, + "learning_rate": 5e-05, + "loss": 1.2139, + "num_input_tokens_seen": 220356420, + "step": 3327 + }, + { + "epoch": 0.3114147985210839, + "loss": 1.0492682456970215, + "loss_ce": 0.004102273844182491, + "loss_iou": 0.41796875, + "loss_num": 0.041748046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 220356420, + "step": 3327 + }, + { + "epoch": 0.31150840080497966, + "grad_norm": 16.401081085205078, + "learning_rate": 5e-05, + "loss": 1.3767, + "num_input_tokens_seen": 220423168, + "step": 3328 + }, + { + "epoch": 0.31150840080497966, + "loss": 1.5529004335403442, + "loss_ce": 0.007734448648989201, + "loss_iou": 0.52734375, + "loss_num": 0.09814453125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 220423168, + "step": 3328 + }, + { + "epoch": 0.31160200308887537, + "grad_norm": 34.76987075805664, + "learning_rate": 5e-05, + "loss": 1.1955, + "num_input_tokens_seen": 220488936, + "step": 3329 + }, + { + "epoch": 0.31160200308887537, + "loss": 1.1450514793395996, + "loss_ce": 0.0068679628893733025, + "loss_iou": 0.484375, + "loss_num": 0.033935546875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 220488936, + "step": 3329 + }, + { + "epoch": 0.3116956053727711, + "grad_norm": 18.02237319946289, + "learning_rate": 5e-05, + "loss": 1.3261, + "num_input_tokens_seen": 220555808, + "step": 3330 + }, + { + "epoch": 0.3116956053727711, + "loss": 1.1664884090423584, + "loss_ce": 0.004897744860500097, + "loss_iou": 0.484375, + "loss_num": 0.0380859375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 220555808, + "step": 3330 + }, + { + "epoch": 0.31178920765666684, + "grad_norm": 39.48426818847656, + "learning_rate": 5e-05, + "loss": 1.3595, + "num_input_tokens_seen": 220621040, + "step": 3331 + }, + { + "epoch": 0.31178920765666684, + "loss": 1.3606083393096924, + "loss_ce": 0.009045885875821114, + "loss_iou": 0.55078125, + "loss_num": 0.0498046875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 220621040, + "step": 3331 + }, + { + "epoch": 0.31188280994056256, + "grad_norm": 29.691120147705078, + "learning_rate": 5e-05, + "loss": 1.3407, + "num_input_tokens_seen": 220687548, + "step": 3332 + }, + { + "epoch": 0.31188280994056256, + "loss": 1.0321433544158936, + "loss_ce": 0.0018699432257562876, + "loss_iou": 0.45703125, + "loss_num": 0.023193359375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 220687548, + "step": 3332 + }, + { + "epoch": 0.31197641222445827, + "grad_norm": 27.59664535522461, + "learning_rate": 5e-05, + "loss": 1.1249, + "num_input_tokens_seen": 220754168, + "step": 3333 + }, + { + "epoch": 0.31197641222445827, + "loss": 1.1480791568756104, + "loss_ce": 0.006965956185013056, + "loss_iou": 0.431640625, + "loss_num": 0.055908203125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 220754168, + "step": 3333 + }, + { + "epoch": 0.312070014508354, + "grad_norm": 26.381746292114258, + "learning_rate": 5e-05, + "loss": 1.5325, + "num_input_tokens_seen": 220819884, + "step": 3334 + }, + { + "epoch": 0.312070014508354, + "loss": 1.3252267837524414, + "loss_ce": 0.003937665373086929, + "loss_iou": 0.5625, + "loss_num": 0.03857421875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 220819884, + "step": 3334 + }, + { + "epoch": 0.31216361679224974, + "grad_norm": 23.994365692138672, + "learning_rate": 5e-05, + "loss": 1.1033, + "num_input_tokens_seen": 220885232, + "step": 3335 + }, + { + "epoch": 0.31216361679224974, + "loss": 1.0954842567443848, + "loss_ce": 0.008081864565610886, + "loss_iou": 0.392578125, + "loss_num": 0.060302734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 220885232, + "step": 3335 + }, + { + "epoch": 0.31225721907614545, + "grad_norm": 28.305471420288086, + "learning_rate": 5e-05, + "loss": 1.4438, + "num_input_tokens_seen": 220951272, + "step": 3336 + }, + { + "epoch": 0.31225721907614545, + "loss": 1.2502145767211914, + "loss_ce": 0.009003687649965286, + "loss_iou": 0.515625, + "loss_num": 0.042236328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 220951272, + "step": 3336 + }, + { + "epoch": 0.31235082136004116, + "grad_norm": 35.367069244384766, + "learning_rate": 5e-05, + "loss": 1.3877, + "num_input_tokens_seen": 221017748, + "step": 3337 + }, + { + "epoch": 0.31235082136004116, + "loss": 1.3979169130325317, + "loss_ce": 0.008268485777080059, + "loss_iou": 0.53515625, + "loss_num": 0.06494140625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 221017748, + "step": 3337 + }, + { + "epoch": 0.31244442364393693, + "grad_norm": 19.60552978515625, + "learning_rate": 5e-05, + "loss": 1.4168, + "num_input_tokens_seen": 221083068, + "step": 3338 + }, + { + "epoch": 0.31244442364393693, + "loss": 1.6001744270324707, + "loss_ce": 0.008133322931826115, + "loss_iou": 0.64453125, + "loss_num": 0.060546875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 221083068, + "step": 3338 + }, + { + "epoch": 0.31253802592783264, + "grad_norm": 52.10533905029297, + "learning_rate": 5e-05, + "loss": 1.4384, + "num_input_tokens_seen": 221149408, + "step": 3339 + }, + { + "epoch": 0.31253802592783264, + "loss": 1.5534617900848389, + "loss_ce": 0.007807568646967411, + "loss_iou": 0.58984375, + "loss_num": 0.0732421875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 221149408, + "step": 3339 + }, + { + "epoch": 0.31263162821172835, + "grad_norm": 29.24900245666504, + "learning_rate": 5e-05, + "loss": 1.48, + "num_input_tokens_seen": 221215192, + "step": 3340 + }, + { + "epoch": 0.31263162821172835, + "loss": 1.3427841663360596, + "loss_ce": 0.0053817350417375565, + "loss_iou": 0.55078125, + "loss_num": 0.04736328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 221215192, + "step": 3340 + }, + { + "epoch": 0.3127252304956241, + "grad_norm": 23.47211456298828, + "learning_rate": 5e-05, + "loss": 1.5811, + "num_input_tokens_seen": 221279480, + "step": 3341 + }, + { + "epoch": 0.3127252304956241, + "loss": 1.5039896965026855, + "loss_ce": 0.004355897195637226, + "loss_iou": 0.6015625, + "loss_num": 0.0595703125, + "loss_xval": 1.5, + "num_input_tokens_seen": 221279480, + "step": 3341 + }, + { + "epoch": 0.31281883277951983, + "grad_norm": 17.825998306274414, + "learning_rate": 5e-05, + "loss": 1.3731, + "num_input_tokens_seen": 221345552, + "step": 3342 + }, + { + "epoch": 0.31281883277951983, + "loss": 1.1372429132461548, + "loss_ce": 0.0061393496580421925, + "loss_iou": 0.48046875, + "loss_num": 0.033935546875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 221345552, + "step": 3342 + }, + { + "epoch": 0.31291243506341554, + "grad_norm": 30.159902572631836, + "learning_rate": 5e-05, + "loss": 1.453, + "num_input_tokens_seen": 221411564, + "step": 3343 + }, + { + "epoch": 0.31291243506341554, + "loss": 1.4636118412017822, + "loss_ce": 0.005115772597491741, + "loss_iou": 0.58984375, + "loss_num": 0.05517578125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 221411564, + "step": 3343 + }, + { + "epoch": 0.31300603734731125, + "grad_norm": 26.943056106567383, + "learning_rate": 5e-05, + "loss": 1.3874, + "num_input_tokens_seen": 221478344, + "step": 3344 + }, + { + "epoch": 0.31300603734731125, + "loss": 1.2727259397506714, + "loss_ce": 0.007589232176542282, + "loss_iou": 0.53125, + "loss_num": 0.041015625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 221478344, + "step": 3344 + }, + { + "epoch": 0.313099639631207, + "grad_norm": 38.17118453979492, + "learning_rate": 5e-05, + "loss": 1.5091, + "num_input_tokens_seen": 221543836, + "step": 3345 + }, + { + "epoch": 0.313099639631207, + "loss": 1.5436885356903076, + "loss_ce": 0.0056026773527264595, + "loss_iou": 0.67578125, + "loss_num": 0.03662109375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 221543836, + "step": 3345 + }, + { + "epoch": 0.3131932419151027, + "grad_norm": 23.127803802490234, + "learning_rate": 5e-05, + "loss": 1.1563, + "num_input_tokens_seen": 221609172, + "step": 3346 + }, + { + "epoch": 0.3131932419151027, + "loss": 1.0623043775558472, + "loss_ce": 0.003710642922669649, + "loss_iou": 0.4375, + "loss_num": 0.037353515625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 221609172, + "step": 3346 + }, + { + "epoch": 0.31328684419899844, + "grad_norm": 105.68035888671875, + "learning_rate": 5e-05, + "loss": 1.3641, + "num_input_tokens_seen": 221675368, + "step": 3347 + }, + { + "epoch": 0.31328684419899844, + "loss": 1.3974735736846924, + "loss_ce": 0.0029422855004668236, + "loss_iou": 0.55859375, + "loss_num": 0.054931640625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 221675368, + "step": 3347 + }, + { + "epoch": 0.3133804464828942, + "grad_norm": 19.81547737121582, + "learning_rate": 5e-05, + "loss": 1.36, + "num_input_tokens_seen": 221741076, + "step": 3348 + }, + { + "epoch": 0.3133804464828942, + "loss": 1.4273831844329834, + "loss_ce": 0.00648471899330616, + "loss_iou": 0.57421875, + "loss_num": 0.05517578125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 221741076, + "step": 3348 + }, + { + "epoch": 0.3134740487667899, + "grad_norm": 77.74214935302734, + "learning_rate": 5e-05, + "loss": 1.3075, + "num_input_tokens_seen": 221807116, + "step": 3349 + }, + { + "epoch": 0.3134740487667899, + "loss": 1.2954914569854736, + "loss_ce": 0.004841960500925779, + "loss_iou": 0.546875, + "loss_num": 0.0390625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 221807116, + "step": 3349 + }, + { + "epoch": 0.3135676510506856, + "grad_norm": 34.61111831665039, + "learning_rate": 5e-05, + "loss": 1.272, + "num_input_tokens_seen": 221873352, + "step": 3350 + }, + { + "epoch": 0.3135676510506856, + "loss": 1.359041690826416, + "loss_ce": 0.004549413453787565, + "loss_iou": 0.5546875, + "loss_num": 0.04833984375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 221873352, + "step": 3350 + }, + { + "epoch": 0.3136612533345814, + "grad_norm": 19.94378662109375, + "learning_rate": 5e-05, + "loss": 1.5089, + "num_input_tokens_seen": 221938976, + "step": 3351 + }, + { + "epoch": 0.3136612533345814, + "loss": 1.492689847946167, + "loss_ce": 0.0034320466220378876, + "loss_iou": 0.63671875, + "loss_num": 0.042236328125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 221938976, + "step": 3351 + }, + { + "epoch": 0.3137548556184771, + "grad_norm": 37.82259750366211, + "learning_rate": 5e-05, + "loss": 1.2216, + "num_input_tokens_seen": 222005316, + "step": 3352 + }, + { + "epoch": 0.3137548556184771, + "loss": 1.036719799041748, + "loss_ce": 0.003608190920203924, + "loss_iou": 0.400390625, + "loss_num": 0.04638671875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 222005316, + "step": 3352 + }, + { + "epoch": 0.3138484579023728, + "grad_norm": 22.122650146484375, + "learning_rate": 5e-05, + "loss": 0.9921, + "num_input_tokens_seen": 222070772, + "step": 3353 + }, + { + "epoch": 0.3138484579023728, + "loss": 0.9437713623046875, + "loss_ce": 0.0027923593297600746, + "loss_iou": 0.373046875, + "loss_num": 0.038818359375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 222070772, + "step": 3353 + }, + { + "epoch": 0.3139420601862685, + "grad_norm": 58.61040115356445, + "learning_rate": 5e-05, + "loss": 1.3761, + "num_input_tokens_seen": 222135596, + "step": 3354 + }, + { + "epoch": 0.3139420601862685, + "loss": 1.4548401832580566, + "loss_ce": 0.004065017215907574, + "loss_iou": 0.62890625, + "loss_num": 0.0390625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 222135596, + "step": 3354 + }, + { + "epoch": 0.3140356624701643, + "grad_norm": 22.35420036315918, + "learning_rate": 5e-05, + "loss": 1.2741, + "num_input_tokens_seen": 222200868, + "step": 3355 + }, + { + "epoch": 0.3140356624701643, + "loss": 1.1652780771255493, + "loss_ce": 0.0026804166845977306, + "loss_iou": 0.41796875, + "loss_num": 0.0654296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 222200868, + "step": 3355 + }, + { + "epoch": 0.31412926475406, + "grad_norm": 61.23613739013672, + "learning_rate": 5e-05, + "loss": 1.5002, + "num_input_tokens_seen": 222267444, + "step": 3356 + }, + { + "epoch": 0.31412926475406, + "loss": 1.6423583030700684, + "loss_ce": 0.00710450392216444, + "loss_iou": 0.65234375, + "loss_num": 0.06689453125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 222267444, + "step": 3356 + }, + { + "epoch": 0.3142228670379557, + "grad_norm": 32.940921783447266, + "learning_rate": 5e-05, + "loss": 1.4766, + "num_input_tokens_seen": 222333788, + "step": 3357 + }, + { + "epoch": 0.3142228670379557, + "loss": 1.3237287998199463, + "loss_ce": 0.006346018519252539, + "loss_iou": 0.5390625, + "loss_num": 0.04833984375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 222333788, + "step": 3357 + }, + { + "epoch": 0.3143164693218515, + "grad_norm": 20.063535690307617, + "learning_rate": 5e-05, + "loss": 1.5623, + "num_input_tokens_seen": 222400256, + "step": 3358 + }, + { + "epoch": 0.3143164693218515, + "loss": 1.6014404296875, + "loss_ce": 0.005004804581403732, + "loss_iou": 0.640625, + "loss_num": 0.06298828125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 222400256, + "step": 3358 + }, + { + "epoch": 0.3144100716057472, + "grad_norm": 17.425281524658203, + "learning_rate": 5e-05, + "loss": 1.1413, + "num_input_tokens_seen": 222465900, + "step": 3359 + }, + { + "epoch": 0.3144100716057472, + "loss": 1.0840343236923218, + "loss_ce": 0.00542107131332159, + "loss_iou": 0.45703125, + "loss_num": 0.032958984375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 222465900, + "step": 3359 + }, + { + "epoch": 0.3145036738896429, + "grad_norm": 20.119585037231445, + "learning_rate": 5e-05, + "loss": 1.2671, + "num_input_tokens_seen": 222532788, + "step": 3360 + }, + { + "epoch": 0.3145036738896429, + "loss": 1.2474873065948486, + "loss_ce": 0.0048115793615579605, + "loss_iou": 0.51953125, + "loss_num": 0.040283203125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 222532788, + "step": 3360 + }, + { + "epoch": 0.3145972761735386, + "grad_norm": 15.529327392578125, + "learning_rate": 5e-05, + "loss": 1.4399, + "num_input_tokens_seen": 222599480, + "step": 3361 + }, + { + "epoch": 0.3145972761735386, + "loss": 1.5852491855621338, + "loss_ce": 0.0022413067054003477, + "loss_iou": 0.6171875, + "loss_num": 0.0703125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 222599480, + "step": 3361 + }, + { + "epoch": 0.3146908784574344, + "grad_norm": 17.559532165527344, + "learning_rate": 5e-05, + "loss": 1.1172, + "num_input_tokens_seen": 222665860, + "step": 3362 + }, + { + "epoch": 0.3146908784574344, + "loss": 1.0267374515533447, + "loss_ce": 0.002079309429973364, + "loss_iou": 0.416015625, + "loss_num": 0.0380859375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 222665860, + "step": 3362 + }, + { + "epoch": 0.3147844807413301, + "grad_norm": 24.45988655090332, + "learning_rate": 5e-05, + "loss": 1.4585, + "num_input_tokens_seen": 222731708, + "step": 3363 + }, + { + "epoch": 0.3147844807413301, + "loss": 1.4786128997802734, + "loss_ce": 0.0044917454943060875, + "loss_iou": 0.59375, + "loss_num": 0.056396484375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 222731708, + "step": 3363 + }, + { + "epoch": 0.3148780830252258, + "grad_norm": 37.90068054199219, + "learning_rate": 5e-05, + "loss": 1.3609, + "num_input_tokens_seen": 222798276, + "step": 3364 + }, + { + "epoch": 0.3148780830252258, + "loss": 1.4048511981964111, + "loss_ce": 0.007390298880636692, + "loss_iou": 0.59765625, + "loss_num": 0.04150390625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 222798276, + "step": 3364 + }, + { + "epoch": 0.31497168530912156, + "grad_norm": 80.03910827636719, + "learning_rate": 5e-05, + "loss": 1.4927, + "num_input_tokens_seen": 222864148, + "step": 3365 + }, + { + "epoch": 0.31497168530912156, + "loss": 1.597489595413208, + "loss_ce": 0.0017864289693534374, + "loss_iou": 0.6953125, + "loss_num": 0.040283203125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 222864148, + "step": 3365 + }, + { + "epoch": 0.3150652875930173, + "grad_norm": 18.732847213745117, + "learning_rate": 5e-05, + "loss": 1.4418, + "num_input_tokens_seen": 222929604, + "step": 3366 + }, + { + "epoch": 0.3150652875930173, + "loss": 1.1654069423675537, + "loss_ce": 0.002809304278343916, + "loss_iou": 0.400390625, + "loss_num": 0.072265625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 222929604, + "step": 3366 + }, + { + "epoch": 0.315158889876913, + "grad_norm": 24.935232162475586, + "learning_rate": 5e-05, + "loss": 1.3883, + "num_input_tokens_seen": 222996012, + "step": 3367 + }, + { + "epoch": 0.315158889876913, + "loss": 1.4018690586090088, + "loss_ce": 0.004896492697298527, + "loss_iou": 0.546875, + "loss_num": 0.06103515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 222996012, + "step": 3367 + }, + { + "epoch": 0.31525249216080875, + "grad_norm": 18.91539764404297, + "learning_rate": 5e-05, + "loss": 1.5116, + "num_input_tokens_seen": 223061952, + "step": 3368 + }, + { + "epoch": 0.31525249216080875, + "loss": 1.7186686992645264, + "loss_ce": 0.00480151642113924, + "loss_iou": 0.70703125, + "loss_num": 0.059326171875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 223061952, + "step": 3368 + }, + { + "epoch": 0.31534609444470446, + "grad_norm": 32.9536247253418, + "learning_rate": 5e-05, + "loss": 1.2714, + "num_input_tokens_seen": 223128572, + "step": 3369 + }, + { + "epoch": 0.31534609444470446, + "loss": 1.2080504894256592, + "loss_ce": 0.0063903434202075005, + "loss_iou": 0.5234375, + "loss_num": 0.030517578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 223128572, + "step": 3369 + }, + { + "epoch": 0.31543969672860017, + "grad_norm": 23.890426635742188, + "learning_rate": 5e-05, + "loss": 1.3285, + "num_input_tokens_seen": 223195320, + "step": 3370 + }, + { + "epoch": 0.31543969672860017, + "loss": 1.3790334463119507, + "loss_ce": 0.0064747813157737255, + "loss_iou": 0.60546875, + "loss_num": 0.03271484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 223195320, + "step": 3370 + }, + { + "epoch": 0.3155332990124959, + "grad_norm": 21.61827850341797, + "learning_rate": 5e-05, + "loss": 1.1121, + "num_input_tokens_seen": 223263108, + "step": 3371 + }, + { + "epoch": 0.3155332990124959, + "loss": 1.2329776287078857, + "loss_ce": 0.0034855089616030455, + "loss_iou": 0.546875, + "loss_num": 0.0264892578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 223263108, + "step": 3371 + }, + { + "epoch": 0.31562690129639165, + "grad_norm": 16.282045364379883, + "learning_rate": 5e-05, + "loss": 1.0576, + "num_input_tokens_seen": 223329144, + "step": 3372 + }, + { + "epoch": 0.31562690129639165, + "loss": 0.9404612183570862, + "loss_ce": 0.0044260770082473755, + "loss_iou": 0.38671875, + "loss_num": 0.032958984375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 223329144, + "step": 3372 + }, + { + "epoch": 0.31572050358028736, + "grad_norm": 24.378602981567383, + "learning_rate": 5e-05, + "loss": 1.4535, + "num_input_tokens_seen": 223395040, + "step": 3373 + }, + { + "epoch": 0.31572050358028736, + "loss": 1.4120330810546875, + "loss_ce": 0.008712712675333023, + "loss_iou": 0.55078125, + "loss_num": 0.060546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 223395040, + "step": 3373 + }, + { + "epoch": 0.31581410586418307, + "grad_norm": 37.420528411865234, + "learning_rate": 5e-05, + "loss": 1.4691, + "num_input_tokens_seen": 223460284, + "step": 3374 + }, + { + "epoch": 0.31581410586418307, + "loss": 1.5664467811584473, + "loss_ce": 0.01078267302364111, + "loss_iou": 0.6484375, + "loss_num": 0.0517578125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 223460284, + "step": 3374 + }, + { + "epoch": 0.31590770814807884, + "grad_norm": 20.938661575317383, + "learning_rate": 5e-05, + "loss": 1.5368, + "num_input_tokens_seen": 223528588, + "step": 3375 + }, + { + "epoch": 0.31590770814807884, + "loss": 1.5774849653244019, + "loss_ce": 0.008148998953402042, + "loss_iou": 0.6484375, + "loss_num": 0.0546875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 223528588, + "step": 3375 + }, + { + "epoch": 0.31600131043197455, + "grad_norm": 29.081159591674805, + "learning_rate": 5e-05, + "loss": 1.325, + "num_input_tokens_seen": 223594368, + "step": 3376 + }, + { + "epoch": 0.31600131043197455, + "loss": 1.1612496376037598, + "loss_ce": 0.0054878611117601395, + "loss_iou": 0.486328125, + "loss_num": 0.03662109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 223594368, + "step": 3376 + }, + { + "epoch": 0.31609491271587026, + "grad_norm": 15.730676651000977, + "learning_rate": 5e-05, + "loss": 1.2645, + "num_input_tokens_seen": 223660236, + "step": 3377 + }, + { + "epoch": 0.31609491271587026, + "loss": 1.3302812576293945, + "loss_ce": 0.005086025223135948, + "loss_iou": 0.546875, + "loss_num": 0.046142578125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 223660236, + "step": 3377 + }, + { + "epoch": 0.31618851499976597, + "grad_norm": 28.429227828979492, + "learning_rate": 5e-05, + "loss": 1.2494, + "num_input_tokens_seen": 223726984, + "step": 3378 + }, + { + "epoch": 0.31618851499976597, + "loss": 1.4166882038116455, + "loss_ce": 0.00653198454529047, + "loss_iou": 0.59765625, + "loss_num": 0.043701171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 223726984, + "step": 3378 + }, + { + "epoch": 0.31628211728366173, + "grad_norm": 27.458473205566406, + "learning_rate": 5e-05, + "loss": 1.521, + "num_input_tokens_seen": 223795444, + "step": 3379 + }, + { + "epoch": 0.31628211728366173, + "loss": 1.5506752729415894, + "loss_ce": 0.008683143183588982, + "loss_iou": 0.625, + "loss_num": 0.057861328125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 223795444, + "step": 3379 + }, + { + "epoch": 0.31637571956755745, + "grad_norm": 40.49128723144531, + "learning_rate": 5e-05, + "loss": 1.357, + "num_input_tokens_seen": 223860836, + "step": 3380 + }, + { + "epoch": 0.31637571956755745, + "loss": 1.21141517162323, + "loss_ce": 0.0024307710118591785, + "loss_iou": 0.484375, + "loss_num": 0.047607421875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 223860836, + "step": 3380 + }, + { + "epoch": 0.31646932185145316, + "grad_norm": 21.579544067382812, + "learning_rate": 5e-05, + "loss": 1.4706, + "num_input_tokens_seen": 223927608, + "step": 3381 + }, + { + "epoch": 0.31646932185145316, + "loss": 1.4991039037704468, + "loss_ce": 0.008869605138897896, + "loss_iou": 0.6640625, + "loss_num": 0.0322265625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 223927608, + "step": 3381 + }, + { + "epoch": 0.3165629241353489, + "grad_norm": 18.886295318603516, + "learning_rate": 5e-05, + "loss": 1.3051, + "num_input_tokens_seen": 223994040, + "step": 3382 + }, + { + "epoch": 0.3165629241353489, + "loss": 1.3103877305984497, + "loss_ce": 0.003258854616433382, + "loss_iou": 0.5, + "loss_num": 0.061279296875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 223994040, + "step": 3382 + }, + { + "epoch": 0.31665652641924463, + "grad_norm": 16.55956268310547, + "learning_rate": 5e-05, + "loss": 1.0806, + "num_input_tokens_seen": 224059484, + "step": 3383 + }, + { + "epoch": 0.31665652641924463, + "loss": 1.2830233573913574, + "loss_ce": 0.00372649310156703, + "loss_iou": 0.515625, + "loss_num": 0.050048828125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 224059484, + "step": 3383 + }, + { + "epoch": 0.31675012870314034, + "grad_norm": 29.22968101501465, + "learning_rate": 5e-05, + "loss": 1.4675, + "num_input_tokens_seen": 224125676, + "step": 3384 + }, + { + "epoch": 0.31675012870314034, + "loss": 1.3845795392990112, + "loss_ce": 0.004208347760140896, + "loss_iou": 0.5546875, + "loss_num": 0.054443359375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 224125676, + "step": 3384 + }, + { + "epoch": 0.3168437309870361, + "grad_norm": 45.01770782470703, + "learning_rate": 5e-05, + "loss": 1.7641, + "num_input_tokens_seen": 224192464, + "step": 3385 + }, + { + "epoch": 0.3168437309870361, + "loss": 1.734407663345337, + "loss_ce": 0.0044272299855947495, + "loss_iou": 0.703125, + "loss_num": 0.06396484375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 224192464, + "step": 3385 + }, + { + "epoch": 0.3169373332709318, + "grad_norm": 131.20025634765625, + "learning_rate": 5e-05, + "loss": 1.6422, + "num_input_tokens_seen": 224259108, + "step": 3386 + }, + { + "epoch": 0.3169373332709318, + "loss": 1.508176326751709, + "loss_ce": 0.005246670916676521, + "loss_iou": 0.6171875, + "loss_num": 0.05322265625, + "loss_xval": 1.5, + "num_input_tokens_seen": 224259108, + "step": 3386 + }, + { + "epoch": 0.31703093555482753, + "grad_norm": 27.698530197143555, + "learning_rate": 5e-05, + "loss": 1.5159, + "num_input_tokens_seen": 224325392, + "step": 3387 + }, + { + "epoch": 0.31703093555482753, + "loss": 1.4979016780853271, + "loss_ce": 0.003272692672908306, + "loss_iou": 0.6015625, + "loss_num": 0.05810546875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 224325392, + "step": 3387 + }, + { + "epoch": 0.31712453783872324, + "grad_norm": 44.2696418762207, + "learning_rate": 5e-05, + "loss": 1.4865, + "num_input_tokens_seen": 224391304, + "step": 3388 + }, + { + "epoch": 0.31712453783872324, + "loss": 1.6612162590026855, + "loss_ce": 0.004966217093169689, + "loss_iou": 0.64453125, + "loss_num": 0.07373046875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 224391304, + "step": 3388 + }, + { + "epoch": 0.317218140122619, + "grad_norm": 42.36366271972656, + "learning_rate": 5e-05, + "loss": 1.5931, + "num_input_tokens_seen": 224457064, + "step": 3389 + }, + { + "epoch": 0.317218140122619, + "loss": 1.5112340450286865, + "loss_ce": 0.004886340349912643, + "loss_iou": 0.5546875, + "loss_num": 0.07958984375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 224457064, + "step": 3389 + }, + { + "epoch": 0.3173117424065147, + "grad_norm": 17.78668975830078, + "learning_rate": 5e-05, + "loss": 1.3423, + "num_input_tokens_seen": 224523372, + "step": 3390 + }, + { + "epoch": 0.3173117424065147, + "loss": 1.2294988632202148, + "loss_ce": 0.0044012125581502914, + "loss_iou": 0.53515625, + "loss_num": 0.031494140625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 224523372, + "step": 3390 + }, + { + "epoch": 0.31740534469041043, + "grad_norm": 35.49195098876953, + "learning_rate": 5e-05, + "loss": 1.2822, + "num_input_tokens_seen": 224590064, + "step": 3391 + }, + { + "epoch": 0.31740534469041043, + "loss": 1.2628147602081299, + "loss_ce": 0.00805398728698492, + "loss_iou": 0.486328125, + "loss_num": 0.056884765625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 224590064, + "step": 3391 + }, + { + "epoch": 0.3174989469743062, + "grad_norm": 29.660625457763672, + "learning_rate": 5e-05, + "loss": 1.7155, + "num_input_tokens_seen": 224656460, + "step": 3392 + }, + { + "epoch": 0.3174989469743062, + "loss": 1.8898135423660278, + "loss_ce": 0.006024497095495462, + "loss_iou": 0.7890625, + "loss_num": 0.061767578125, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 224656460, + "step": 3392 + }, + { + "epoch": 0.3175925492582019, + "grad_norm": 33.64856719970703, + "learning_rate": 5e-05, + "loss": 1.2783, + "num_input_tokens_seen": 224722244, + "step": 3393 + }, + { + "epoch": 0.3175925492582019, + "loss": 1.1382837295532227, + "loss_ce": 0.010598192922770977, + "loss_iou": 0.419921875, + "loss_num": 0.0576171875, + "loss_xval": 1.125, + "num_input_tokens_seen": 224722244, + "step": 3393 + }, + { + "epoch": 0.3176861515420976, + "grad_norm": 62.0196418762207, + "learning_rate": 5e-05, + "loss": 1.2391, + "num_input_tokens_seen": 224788924, + "step": 3394 + }, + { + "epoch": 0.3176861515420976, + "loss": 1.3180869817733765, + "loss_ce": 0.004122164100408554, + "loss_iou": 0.5625, + "loss_num": 0.037841796875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 224788924, + "step": 3394 + }, + { + "epoch": 0.3177797538259933, + "grad_norm": 62.28541564941406, + "learning_rate": 5e-05, + "loss": 1.5308, + "num_input_tokens_seen": 224855448, + "step": 3395 + }, + { + "epoch": 0.3177797538259933, + "loss": 1.6843470335006714, + "loss_ce": 0.003682971466332674, + "loss_iou": 0.65234375, + "loss_num": 0.07470703125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 224855448, + "step": 3395 + }, + { + "epoch": 0.3178733561098891, + "grad_norm": 50.235809326171875, + "learning_rate": 5e-05, + "loss": 1.5338, + "num_input_tokens_seen": 224920948, + "step": 3396 + }, + { + "epoch": 0.3178733561098891, + "loss": 1.6580981016159058, + "loss_ce": 0.007707488723099232, + "loss_iou": 0.66015625, + "loss_num": 0.06494140625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 224920948, + "step": 3396 + }, + { + "epoch": 0.3179669583937848, + "grad_norm": 23.435644149780273, + "learning_rate": 5e-05, + "loss": 1.6499, + "num_input_tokens_seen": 224986096, + "step": 3397 + }, + { + "epoch": 0.3179669583937848, + "loss": 1.7257496118545532, + "loss_ce": 0.007487837225198746, + "loss_iou": 0.72265625, + "loss_num": 0.055419921875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 224986096, + "step": 3397 + }, + { + "epoch": 0.3180605606776805, + "grad_norm": 16.558685302734375, + "learning_rate": 5e-05, + "loss": 1.2941, + "num_input_tokens_seen": 225051804, + "step": 3398 + }, + { + "epoch": 0.3180605606776805, + "loss": 1.201046347618103, + "loss_ce": 0.004757278133183718, + "loss_iou": 0.49609375, + "loss_num": 0.040771484375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 225051804, + "step": 3398 + }, + { + "epoch": 0.3181541629615763, + "grad_norm": 35.21195602416992, + "learning_rate": 5e-05, + "loss": 1.3751, + "num_input_tokens_seen": 225117612, + "step": 3399 + }, + { + "epoch": 0.3181541629615763, + "loss": 1.3540635108947754, + "loss_ce": 0.0073838382959365845, + "loss_iou": 0.5078125, + "loss_num": 0.06689453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 225117612, + "step": 3399 + }, + { + "epoch": 0.318247765245472, + "grad_norm": 26.134944915771484, + "learning_rate": 5e-05, + "loss": 1.3678, + "num_input_tokens_seen": 225184460, + "step": 3400 + }, + { + "epoch": 0.318247765245472, + "loss": 1.3506264686584473, + "loss_ce": 0.0034585148096084595, + "loss_iou": 0.59375, + "loss_num": 0.031494140625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 225184460, + "step": 3400 + }, + { + "epoch": 0.3183413675293677, + "grad_norm": 36.55769729614258, + "learning_rate": 5e-05, + "loss": 1.3126, + "num_input_tokens_seen": 225251700, + "step": 3401 + }, + { + "epoch": 0.3183413675293677, + "loss": 1.3989133834838867, + "loss_ce": 0.011218111030757427, + "loss_iou": 0.53515625, + "loss_num": 0.06396484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 225251700, + "step": 3401 + }, + { + "epoch": 0.31843496981326347, + "grad_norm": 22.841083526611328, + "learning_rate": 5e-05, + "loss": 1.2714, + "num_input_tokens_seen": 225317468, + "step": 3402 + }, + { + "epoch": 0.31843496981326347, + "loss": 1.4070237874984741, + "loss_ce": 0.006633143872022629, + "loss_iou": 0.57421875, + "loss_num": 0.05126953125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 225317468, + "step": 3402 + }, + { + "epoch": 0.3185285720971592, + "grad_norm": 23.36016845703125, + "learning_rate": 5e-05, + "loss": 1.4898, + "num_input_tokens_seen": 225383468, + "step": 3403 + }, + { + "epoch": 0.3185285720971592, + "loss": 1.3079955577850342, + "loss_ce": 0.007549961097538471, + "loss_iou": 0.51953125, + "loss_num": 0.052734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 225383468, + "step": 3403 + }, + { + "epoch": 0.3186221743810549, + "grad_norm": 30.088396072387695, + "learning_rate": 5e-05, + "loss": 1.5034, + "num_input_tokens_seen": 225449732, + "step": 3404 + }, + { + "epoch": 0.3186221743810549, + "loss": 1.3973443508148193, + "loss_ce": 0.004278029780834913, + "loss_iou": 0.5625, + "loss_num": 0.05419921875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 225449732, + "step": 3404 + }, + { + "epoch": 0.3187157766649506, + "grad_norm": 20.8262882232666, + "learning_rate": 5e-05, + "loss": 1.4677, + "num_input_tokens_seen": 225515332, + "step": 3405 + }, + { + "epoch": 0.3187157766649506, + "loss": 1.4387317895889282, + "loss_ce": 0.0031849045772105455, + "loss_iou": 0.64453125, + "loss_num": 0.0289306640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 225515332, + "step": 3405 + }, + { + "epoch": 0.31880937894884637, + "grad_norm": 15.956134796142578, + "learning_rate": 5e-05, + "loss": 1.2472, + "num_input_tokens_seen": 225582580, + "step": 3406 + }, + { + "epoch": 0.31880937894884637, + "loss": 1.289783000946045, + "loss_ce": 0.003161872271448374, + "loss_iou": 0.52734375, + "loss_num": 0.046142578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 225582580, + "step": 3406 + }, + { + "epoch": 0.3189029812327421, + "grad_norm": 27.982378005981445, + "learning_rate": 5e-05, + "loss": 1.2075, + "num_input_tokens_seen": 225649324, + "step": 3407 + }, + { + "epoch": 0.3189029812327421, + "loss": 1.2502256631851196, + "loss_ce": 0.0055967336520552635, + "loss_iou": 0.48828125, + "loss_num": 0.05419921875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 225649324, + "step": 3407 + }, + { + "epoch": 0.3189965835166378, + "grad_norm": 25.254352569580078, + "learning_rate": 5e-05, + "loss": 1.5822, + "num_input_tokens_seen": 225715472, + "step": 3408 + }, + { + "epoch": 0.3189965835166378, + "loss": 1.3646240234375, + "loss_ce": 0.008178685791790485, + "loss_iou": 0.52734375, + "loss_num": 0.060546875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 225715472, + "step": 3408 + }, + { + "epoch": 0.31909018580053355, + "grad_norm": 29.55914878845215, + "learning_rate": 5e-05, + "loss": 1.3099, + "num_input_tokens_seen": 225781784, + "step": 3409 + }, + { + "epoch": 0.31909018580053355, + "loss": 1.1618421077728271, + "loss_ce": 0.004127358552068472, + "loss_iou": 0.4765625, + "loss_num": 0.041015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 225781784, + "step": 3409 + }, + { + "epoch": 0.31918378808442927, + "grad_norm": 105.87574768066406, + "learning_rate": 5e-05, + "loss": 1.4326, + "num_input_tokens_seen": 225848616, + "step": 3410 + }, + { + "epoch": 0.31918378808442927, + "loss": 1.558828353881836, + "loss_ce": 0.010000256821513176, + "loss_iou": 0.6328125, + "loss_num": 0.056884765625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 225848616, + "step": 3410 + }, + { + "epoch": 0.319277390368325, + "grad_norm": 41.374290466308594, + "learning_rate": 5e-05, + "loss": 1.4072, + "num_input_tokens_seen": 225915488, + "step": 3411 + }, + { + "epoch": 0.319277390368325, + "loss": 1.2789338827133179, + "loss_ce": 0.0057404618710279465, + "loss_iou": 0.5, + "loss_num": 0.053955078125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 225915488, + "step": 3411 + }, + { + "epoch": 0.3193709926522207, + "grad_norm": 83.49828338623047, + "learning_rate": 5e-05, + "loss": 1.2053, + "num_input_tokens_seen": 225981768, + "step": 3412 + }, + { + "epoch": 0.3193709926522207, + "loss": 1.3126630783081055, + "loss_ce": 0.0050458889454603195, + "loss_iou": 0.515625, + "loss_num": 0.0556640625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 225981768, + "step": 3412 + }, + { + "epoch": 0.31946459493611645, + "grad_norm": 13.549079895019531, + "learning_rate": 5e-05, + "loss": 1.1658, + "num_input_tokens_seen": 226048652, + "step": 3413 + }, + { + "epoch": 0.31946459493611645, + "loss": 1.2378922700881958, + "loss_ce": 0.005958714056760073, + "loss_iou": 0.490234375, + "loss_num": 0.05029296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 226048652, + "step": 3413 + }, + { + "epoch": 0.31955819722001216, + "grad_norm": 36.342227935791016, + "learning_rate": 5e-05, + "loss": 1.499, + "num_input_tokens_seen": 226114952, + "step": 3414 + }, + { + "epoch": 0.31955819722001216, + "loss": 1.4749104976654053, + "loss_ce": 0.004207334015518427, + "loss_iou": 0.578125, + "loss_num": 0.062255859375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 226114952, + "step": 3414 + }, + { + "epoch": 0.3196517995039079, + "grad_norm": 13.046402931213379, + "learning_rate": 5e-05, + "loss": 1.1797, + "num_input_tokens_seen": 226181568, + "step": 3415 + }, + { + "epoch": 0.3196517995039079, + "loss": 1.1467552185058594, + "loss_ce": 0.005397803150117397, + "loss_iou": 0.5, + "loss_num": 0.0283203125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 226181568, + "step": 3415 + }, + { + "epoch": 0.31974540178780364, + "grad_norm": 42.7341423034668, + "learning_rate": 5e-05, + "loss": 1.2816, + "num_input_tokens_seen": 226247452, + "step": 3416 + }, + { + "epoch": 0.31974540178780364, + "loss": 1.2671403884887695, + "loss_ce": 0.01176926214247942, + "loss_iou": 0.4921875, + "loss_num": 0.05419921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 226247452, + "step": 3416 + }, + { + "epoch": 0.31983900407169935, + "grad_norm": 19.674161911010742, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 226314116, + "step": 3417 + }, + { + "epoch": 0.31983900407169935, + "loss": 1.302283763885498, + "loss_ce": 0.003943867515772581, + "loss_iou": 0.5078125, + "loss_num": 0.057373046875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 226314116, + "step": 3417 + }, + { + "epoch": 0.31993260635559506, + "grad_norm": 32.49250411987305, + "learning_rate": 5e-05, + "loss": 1.2044, + "num_input_tokens_seen": 226380248, + "step": 3418 + }, + { + "epoch": 0.31993260635559506, + "loss": 1.338599443435669, + "loss_ce": 0.010474497452378273, + "loss_iou": 0.55859375, + "loss_num": 0.042236328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 226380248, + "step": 3418 + }, + { + "epoch": 0.32002620863949083, + "grad_norm": 20.575708389282227, + "learning_rate": 5e-05, + "loss": 1.2589, + "num_input_tokens_seen": 226447452, + "step": 3419 + }, + { + "epoch": 0.32002620863949083, + "loss": 1.138270616531372, + "loss_ce": 0.0033829244785010815, + "loss_iou": 0.47265625, + "loss_num": 0.0380859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 226447452, + "step": 3419 + }, + { + "epoch": 0.32011981092338654, + "grad_norm": 16.22533416748047, + "learning_rate": 5e-05, + "loss": 1.2253, + "num_input_tokens_seen": 226513744, + "step": 3420 + }, + { + "epoch": 0.32011981092338654, + "loss": 1.3674187660217285, + "loss_ce": 0.003160955850034952, + "loss_iou": 0.5625, + "loss_num": 0.04833984375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 226513744, + "step": 3420 + }, + { + "epoch": 0.32021341320728225, + "grad_norm": 24.764902114868164, + "learning_rate": 5e-05, + "loss": 1.2549, + "num_input_tokens_seen": 226581080, + "step": 3421 + }, + { + "epoch": 0.32021341320728225, + "loss": 1.2663476467132568, + "loss_ce": 0.0046288808807730675, + "loss_iou": 0.484375, + "loss_num": 0.058837890625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 226581080, + "step": 3421 + }, + { + "epoch": 0.32030701549117796, + "grad_norm": 41.2261962890625, + "learning_rate": 5e-05, + "loss": 1.5733, + "num_input_tokens_seen": 226647828, + "step": 3422 + }, + { + "epoch": 0.32030701549117796, + "loss": 1.6071685552597046, + "loss_ce": 0.008535739034414291, + "loss_iou": 0.66796875, + "loss_num": 0.052490234375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 226647828, + "step": 3422 + }, + { + "epoch": 0.3204006177750737, + "grad_norm": 23.013776779174805, + "learning_rate": 5e-05, + "loss": 1.5592, + "num_input_tokens_seen": 226714868, + "step": 3423 + }, + { + "epoch": 0.3204006177750737, + "loss": 1.6581511497497559, + "loss_ce": 0.005319178104400635, + "loss_iou": 0.6875, + "loss_num": 0.056396484375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 226714868, + "step": 3423 + }, + { + "epoch": 0.32049422005896944, + "grad_norm": 17.5068359375, + "learning_rate": 5e-05, + "loss": 0.9892, + "num_input_tokens_seen": 226780336, + "step": 3424 + }, + { + "epoch": 0.32049422005896944, + "loss": 0.9788135290145874, + "loss_ce": 0.00542483339086175, + "loss_iou": 0.390625, + "loss_num": 0.03857421875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 226780336, + "step": 3424 + }, + { + "epoch": 0.32058782234286515, + "grad_norm": 22.214794158935547, + "learning_rate": 5e-05, + "loss": 1.282, + "num_input_tokens_seen": 226846332, + "step": 3425 + }, + { + "epoch": 0.32058782234286515, + "loss": 1.4596538543701172, + "loss_ce": 0.007505323737859726, + "loss_iou": 0.57421875, + "loss_num": 0.060546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 226846332, + "step": 3425 + }, + { + "epoch": 0.3206814246267609, + "grad_norm": 23.288240432739258, + "learning_rate": 5e-05, + "loss": 1.4074, + "num_input_tokens_seen": 226912540, + "step": 3426 + }, + { + "epoch": 0.3206814246267609, + "loss": 1.1024901866912842, + "loss_ce": 0.0038573648780584335, + "loss_iou": 0.482421875, + "loss_num": 0.027099609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 226912540, + "step": 3426 + }, + { + "epoch": 0.3207750269106566, + "grad_norm": 28.18408966064453, + "learning_rate": 5e-05, + "loss": 1.3961, + "num_input_tokens_seen": 226979152, + "step": 3427 + }, + { + "epoch": 0.3207750269106566, + "loss": 1.4781715869903564, + "loss_ce": 0.006369719281792641, + "loss_iou": 0.57421875, + "loss_num": 0.06494140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 226979152, + "step": 3427 + }, + { + "epoch": 0.32086862919455234, + "grad_norm": 24.60514259338379, + "learning_rate": 5e-05, + "loss": 1.099, + "num_input_tokens_seen": 227044600, + "step": 3428 + }, + { + "epoch": 0.32086862919455234, + "loss": 1.233229637145996, + "loss_ce": 0.005690678488463163, + "loss_iou": 0.5390625, + "loss_num": 0.0291748046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 227044600, + "step": 3428 + }, + { + "epoch": 0.3209622314784481, + "grad_norm": 21.425580978393555, + "learning_rate": 5e-05, + "loss": 1.3, + "num_input_tokens_seen": 227110888, + "step": 3429 + }, + { + "epoch": 0.3209622314784481, + "loss": 1.3242912292480469, + "loss_ce": 0.0044670719653368, + "loss_iou": 0.51953125, + "loss_num": 0.056640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 227110888, + "step": 3429 + }, + { + "epoch": 0.3210558337623438, + "grad_norm": 27.703676223754883, + "learning_rate": 5e-05, + "loss": 1.2957, + "num_input_tokens_seen": 227176600, + "step": 3430 + }, + { + "epoch": 0.3210558337623438, + "loss": 1.097275972366333, + "loss_ce": 0.0035258883144706488, + "loss_iou": 0.486328125, + "loss_num": 0.0240478515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 227176600, + "step": 3430 + }, + { + "epoch": 0.3211494360462395, + "grad_norm": 37.83911895751953, + "learning_rate": 5e-05, + "loss": 1.3346, + "num_input_tokens_seen": 227242196, + "step": 3431 + }, + { + "epoch": 0.3211494360462395, + "loss": 1.5346314907073975, + "loss_ce": 0.008264346979558468, + "loss_iou": 0.578125, + "loss_num": 0.07421875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 227242196, + "step": 3431 + }, + { + "epoch": 0.32124303833013523, + "grad_norm": 37.454063415527344, + "learning_rate": 5e-05, + "loss": 1.413, + "num_input_tokens_seen": 227309504, + "step": 3432 + }, + { + "epoch": 0.32124303833013523, + "loss": 1.4191879034042358, + "loss_ce": 0.005125434137880802, + "loss_iou": 0.6015625, + "loss_num": 0.0419921875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 227309504, + "step": 3432 + }, + { + "epoch": 0.321336640614031, + "grad_norm": 25.266143798828125, + "learning_rate": 5e-05, + "loss": 1.3904, + "num_input_tokens_seen": 227375672, + "step": 3433 + }, + { + "epoch": 0.321336640614031, + "loss": 1.439277172088623, + "loss_ce": 0.004218521527945995, + "loss_iou": 0.57421875, + "loss_num": 0.0576171875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 227375672, + "step": 3433 + }, + { + "epoch": 0.3214302428979267, + "grad_norm": 33.49772644042969, + "learning_rate": 5e-05, + "loss": 1.4032, + "num_input_tokens_seen": 227441280, + "step": 3434 + }, + { + "epoch": 0.3214302428979267, + "loss": 1.3680124282836914, + "loss_ce": 0.012055323459208012, + "loss_iou": 0.5703125, + "loss_num": 0.042724609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 227441280, + "step": 3434 + }, + { + "epoch": 0.3215238451818224, + "grad_norm": 39.83012771606445, + "learning_rate": 5e-05, + "loss": 1.585, + "num_input_tokens_seen": 227507384, + "step": 3435 + }, + { + "epoch": 0.3215238451818224, + "loss": 1.7486791610717773, + "loss_ce": 0.004538507200777531, + "loss_iou": 0.671875, + "loss_num": 0.08056640625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 227507384, + "step": 3435 + }, + { + "epoch": 0.3216174474657182, + "grad_norm": 108.656005859375, + "learning_rate": 5e-05, + "loss": 1.8334, + "num_input_tokens_seen": 227574156, + "step": 3436 + }, + { + "epoch": 0.3216174474657182, + "loss": 1.5953823328018188, + "loss_ce": 0.004073723219335079, + "loss_iou": 0.7109375, + "loss_num": 0.033935546875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 227574156, + "step": 3436 + }, + { + "epoch": 0.3217110497496139, + "grad_norm": 32.74374008178711, + "learning_rate": 5e-05, + "loss": 1.408, + "num_input_tokens_seen": 227640060, + "step": 3437 + }, + { + "epoch": 0.3217110497496139, + "loss": 1.5385544300079346, + "loss_ce": 0.006327945739030838, + "loss_iou": 0.6015625, + "loss_num": 0.06494140625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 227640060, + "step": 3437 + }, + { + "epoch": 0.3218046520335096, + "grad_norm": 22.27479362487793, + "learning_rate": 5e-05, + "loss": 1.311, + "num_input_tokens_seen": 227704916, + "step": 3438 + }, + { + "epoch": 0.3218046520335096, + "loss": 1.1508591175079346, + "loss_ce": 0.006083630956709385, + "loss_iou": 0.4765625, + "loss_num": 0.038818359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 227704916, + "step": 3438 + }, + { + "epoch": 0.3218982543174053, + "grad_norm": 163.22607421875, + "learning_rate": 5e-05, + "loss": 1.4206, + "num_input_tokens_seen": 227771108, + "step": 3439 + }, + { + "epoch": 0.3218982543174053, + "loss": 1.1678868532180786, + "loss_ce": 0.006754089146852493, + "loss_iou": 0.4296875, + "loss_num": 0.060546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 227771108, + "step": 3439 + }, + { + "epoch": 0.3219918566013011, + "grad_norm": 24.382034301757812, + "learning_rate": 5e-05, + "loss": 1.4159, + "num_input_tokens_seen": 227837164, + "step": 3440 + }, + { + "epoch": 0.3219918566013011, + "loss": 1.493950366973877, + "loss_ce": 0.002373296767473221, + "loss_iou": 0.5625, + "loss_num": 0.0732421875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 227837164, + "step": 3440 + }, + { + "epoch": 0.3220854588851968, + "grad_norm": 20.726337432861328, + "learning_rate": 5e-05, + "loss": 1.4626, + "num_input_tokens_seen": 227903688, + "step": 3441 + }, + { + "epoch": 0.3220854588851968, + "loss": 1.488643765449524, + "loss_ce": 0.004757056478410959, + "loss_iou": 0.59375, + "loss_num": 0.059814453125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 227903688, + "step": 3441 + }, + { + "epoch": 0.3221790611690925, + "grad_norm": 22.031131744384766, + "learning_rate": 5e-05, + "loss": 1.703, + "num_input_tokens_seen": 227970208, + "step": 3442 + }, + { + "epoch": 0.3221790611690925, + "loss": 1.785135269165039, + "loss_ce": 0.005838434211909771, + "loss_iou": 0.6484375, + "loss_num": 0.0966796875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 227970208, + "step": 3442 + }, + { + "epoch": 0.3222726634529883, + "grad_norm": 20.484830856323242, + "learning_rate": 5e-05, + "loss": 1.5101, + "num_input_tokens_seen": 228036752, + "step": 3443 + }, + { + "epoch": 0.3222726634529883, + "loss": 1.3310368061065674, + "loss_ce": 0.0019352753879502416, + "loss_iou": 0.515625, + "loss_num": 0.05908203125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 228036752, + "step": 3443 + }, + { + "epoch": 0.322366265736884, + "grad_norm": 23.667524337768555, + "learning_rate": 5e-05, + "loss": 1.2262, + "num_input_tokens_seen": 228103348, + "step": 3444 + }, + { + "epoch": 0.322366265736884, + "loss": 1.22589111328125, + "loss_ce": 0.005676334723830223, + "loss_iou": 0.50390625, + "loss_num": 0.0419921875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 228103348, + "step": 3444 + }, + { + "epoch": 0.3224598680207797, + "grad_norm": 23.98842430114746, + "learning_rate": 5e-05, + "loss": 1.4692, + "num_input_tokens_seen": 228168600, + "step": 3445 + }, + { + "epoch": 0.3224598680207797, + "loss": 1.5688738822937012, + "loss_ce": 0.008815182372927666, + "loss_iou": 0.6015625, + "loss_num": 0.0712890625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 228168600, + "step": 3445 + }, + { + "epoch": 0.32255347030467546, + "grad_norm": 31.966474533081055, + "learning_rate": 5e-05, + "loss": 1.4171, + "num_input_tokens_seen": 228234136, + "step": 3446 + }, + { + "epoch": 0.32255347030467546, + "loss": 1.3432668447494507, + "loss_ce": 0.008305962197482586, + "loss_iou": 0.57421875, + "loss_num": 0.037841796875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 228234136, + "step": 3446 + }, + { + "epoch": 0.32264707258857117, + "grad_norm": 25.92433738708496, + "learning_rate": 5e-05, + "loss": 1.6404, + "num_input_tokens_seen": 228300360, + "step": 3447 + }, + { + "epoch": 0.32264707258857117, + "loss": 1.4639012813568115, + "loss_ce": 0.008823145180940628, + "loss_iou": 0.6171875, + "loss_num": 0.044189453125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 228300360, + "step": 3447 + }, + { + "epoch": 0.3227406748724669, + "grad_norm": 24.386028289794922, + "learning_rate": 5e-05, + "loss": 1.1537, + "num_input_tokens_seen": 228367160, + "step": 3448 + }, + { + "epoch": 0.3227406748724669, + "loss": 1.1790634393692017, + "loss_ce": 0.00523528503254056, + "loss_iou": 0.482421875, + "loss_num": 0.041748046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 228367160, + "step": 3448 + }, + { + "epoch": 0.3228342771563626, + "grad_norm": 36.26959228515625, + "learning_rate": 5e-05, + "loss": 1.2385, + "num_input_tokens_seen": 228434172, + "step": 3449 + }, + { + "epoch": 0.3228342771563626, + "loss": 1.2111108303070068, + "loss_ce": 0.007497443817555904, + "loss_iou": 0.48046875, + "loss_num": 0.048583984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 228434172, + "step": 3449 + }, + { + "epoch": 0.32292787944025836, + "grad_norm": 24.693557739257812, + "learning_rate": 5e-05, + "loss": 1.3832, + "num_input_tokens_seen": 228501244, + "step": 3450 + }, + { + "epoch": 0.32292787944025836, + "loss": 1.470740556716919, + "loss_ce": 0.001990544144064188, + "loss_iou": 0.5546875, + "loss_num": 0.07275390625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 228501244, + "step": 3450 + }, + { + "epoch": 0.32302148172415407, + "grad_norm": 20.144039154052734, + "learning_rate": 5e-05, + "loss": 1.4975, + "num_input_tokens_seen": 228566904, + "step": 3451 + }, + { + "epoch": 0.32302148172415407, + "loss": 1.4781911373138428, + "loss_ce": 0.010417640209197998, + "loss_iou": 0.56640625, + "loss_num": 0.06689453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 228566904, + "step": 3451 + }, + { + "epoch": 0.3231150840080498, + "grad_norm": 22.918781280517578, + "learning_rate": 5e-05, + "loss": 1.4302, + "num_input_tokens_seen": 228631652, + "step": 3452 + }, + { + "epoch": 0.3231150840080498, + "loss": 1.281588077545166, + "loss_ce": 0.010103719308972359, + "loss_iou": 0.44921875, + "loss_num": 0.07470703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 228631652, + "step": 3452 + }, + { + "epoch": 0.32320868629194555, + "grad_norm": 32.87624740600586, + "learning_rate": 5e-05, + "loss": 1.4831, + "num_input_tokens_seen": 228698160, + "step": 3453 + }, + { + "epoch": 0.32320868629194555, + "loss": 1.4957568645477295, + "loss_ce": 0.005522478371858597, + "loss_iou": 0.65625, + "loss_num": 0.034912109375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 228698160, + "step": 3453 + }, + { + "epoch": 0.32330228857584126, + "grad_norm": 27.107770919799805, + "learning_rate": 5e-05, + "loss": 1.5809, + "num_input_tokens_seen": 228765000, + "step": 3454 + }, + { + "epoch": 0.32330228857584126, + "loss": 1.6188907623291016, + "loss_ce": 0.003656448097899556, + "loss_iou": 0.640625, + "loss_num": 0.06689453125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 228765000, + "step": 3454 + }, + { + "epoch": 0.32339589085973697, + "grad_norm": 44.22768783569336, + "learning_rate": 5e-05, + "loss": 1.3453, + "num_input_tokens_seen": 228831448, + "step": 3455 + }, + { + "epoch": 0.32339589085973697, + "loss": 1.2090684175491333, + "loss_ce": 0.0020371791906654835, + "loss_iou": 0.4765625, + "loss_num": 0.050537109375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 228831448, + "step": 3455 + }, + { + "epoch": 0.3234894931436327, + "grad_norm": 27.273752212524414, + "learning_rate": 5e-05, + "loss": 1.4877, + "num_input_tokens_seen": 228898152, + "step": 3456 + }, + { + "epoch": 0.3234894931436327, + "loss": 1.4453917741775513, + "loss_ce": 0.007891766726970673, + "loss_iou": 0.60546875, + "loss_num": 0.046142578125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 228898152, + "step": 3456 + }, + { + "epoch": 0.32358309542752844, + "grad_norm": 21.556808471679688, + "learning_rate": 5e-05, + "loss": 1.3753, + "num_input_tokens_seen": 228964848, + "step": 3457 + }, + { + "epoch": 0.32358309542752844, + "loss": 1.4642695188522339, + "loss_ce": 0.008214849047362804, + "loss_iou": 0.58984375, + "loss_num": 0.0546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 228964848, + "step": 3457 + }, + { + "epoch": 0.32367669771142416, + "grad_norm": 19.960359573364258, + "learning_rate": 5e-05, + "loss": 1.3073, + "num_input_tokens_seen": 229031824, + "step": 3458 + }, + { + "epoch": 0.32367669771142416, + "loss": 1.2399299144744873, + "loss_ce": 0.004090023692697287, + "loss_iou": 0.53515625, + "loss_num": 0.03271484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 229031824, + "step": 3458 + }, + { + "epoch": 0.32377029999531987, + "grad_norm": 18.274446487426758, + "learning_rate": 5e-05, + "loss": 1.3985, + "num_input_tokens_seen": 229098888, + "step": 3459 + }, + { + "epoch": 0.32377029999531987, + "loss": 1.463757038116455, + "loss_ce": 0.0033078021369874477, + "loss_iou": 0.60546875, + "loss_num": 0.049560546875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 229098888, + "step": 3459 + }, + { + "epoch": 0.32386390227921563, + "grad_norm": 26.443449020385742, + "learning_rate": 5e-05, + "loss": 1.2965, + "num_input_tokens_seen": 229164808, + "step": 3460 + }, + { + "epoch": 0.32386390227921563, + "loss": 1.220699667930603, + "loss_ce": 0.0039027612656354904, + "loss_iou": 0.5234375, + "loss_num": 0.033203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 229164808, + "step": 3460 + }, + { + "epoch": 0.32395750456311134, + "grad_norm": 26.293176651000977, + "learning_rate": 5e-05, + "loss": 1.5823, + "num_input_tokens_seen": 229230496, + "step": 3461 + }, + { + "epoch": 0.32395750456311134, + "loss": 1.5072904825210571, + "loss_ce": 0.009243616834282875, + "loss_iou": 0.59375, + "loss_num": 0.0615234375, + "loss_xval": 1.5, + "num_input_tokens_seen": 229230496, + "step": 3461 + }, + { + "epoch": 0.32405110684700705, + "grad_norm": 39.32388687133789, + "learning_rate": 5e-05, + "loss": 1.4236, + "num_input_tokens_seen": 229296584, + "step": 3462 + }, + { + "epoch": 0.32405110684700705, + "loss": 1.4249300956726074, + "loss_ce": 0.00500829890370369, + "loss_iou": 0.55859375, + "loss_num": 0.060302734375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 229296584, + "step": 3462 + }, + { + "epoch": 0.3241447091309028, + "grad_norm": 24.357038497924805, + "learning_rate": 5e-05, + "loss": 1.5255, + "num_input_tokens_seen": 229363504, + "step": 3463 + }, + { + "epoch": 0.3241447091309028, + "loss": 1.4392220973968506, + "loss_ce": 0.008069687522947788, + "loss_iou": 0.5625, + "loss_num": 0.060791015625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 229363504, + "step": 3463 + }, + { + "epoch": 0.32423831141479853, + "grad_norm": 35.12007522583008, + "learning_rate": 5e-05, + "loss": 1.4583, + "num_input_tokens_seen": 229429140, + "step": 3464 + }, + { + "epoch": 0.32423831141479853, + "loss": 1.6169214248657227, + "loss_ce": 0.0048608784563839436, + "loss_iou": 0.6015625, + "loss_num": 0.08203125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 229429140, + "step": 3464 + }, + { + "epoch": 0.32433191369869424, + "grad_norm": 21.183223724365234, + "learning_rate": 5e-05, + "loss": 1.1281, + "num_input_tokens_seen": 229495512, + "step": 3465 + }, + { + "epoch": 0.32433191369869424, + "loss": 1.2203271389007568, + "loss_ce": 0.004262746311724186, + "loss_iou": 0.462890625, + "loss_num": 0.05810546875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 229495512, + "step": 3465 + }, + { + "epoch": 0.32442551598258995, + "grad_norm": 20.533761978149414, + "learning_rate": 5e-05, + "loss": 1.3212, + "num_input_tokens_seen": 229562096, + "step": 3466 + }, + { + "epoch": 0.32442551598258995, + "loss": 1.3959980010986328, + "loss_ce": 0.006349561735987663, + "loss_iou": 0.58984375, + "loss_num": 0.0419921875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 229562096, + "step": 3466 + }, + { + "epoch": 0.3245191182664857, + "grad_norm": 20.71112632751465, + "learning_rate": 5e-05, + "loss": 1.1819, + "num_input_tokens_seen": 229628540, + "step": 3467 + }, + { + "epoch": 0.3245191182664857, + "loss": 0.9793422818183899, + "loss_ce": 0.0071743205189704895, + "loss_iou": 0.42578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 229628540, + "step": 3467 + }, + { + "epoch": 0.32461272055038143, + "grad_norm": 25.044464111328125, + "learning_rate": 5e-05, + "loss": 1.1751, + "num_input_tokens_seen": 229693956, + "step": 3468 + }, + { + "epoch": 0.32461272055038143, + "loss": 1.39524245262146, + "loss_ce": 0.003640956711024046, + "loss_iou": 0.61328125, + "loss_num": 0.03271484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 229693956, + "step": 3468 + }, + { + "epoch": 0.32470632283427714, + "grad_norm": 19.4493408203125, + "learning_rate": 5e-05, + "loss": 1.2971, + "num_input_tokens_seen": 229759644, + "step": 3469 + }, + { + "epoch": 0.32470632283427714, + "loss": 1.3415346145629883, + "loss_ce": 0.006085404194891453, + "loss_iou": 0.5234375, + "loss_num": 0.058349609375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 229759644, + "step": 3469 + }, + { + "epoch": 0.3247999251181729, + "grad_norm": 23.106149673461914, + "learning_rate": 5e-05, + "loss": 1.2264, + "num_input_tokens_seen": 229826152, + "step": 3470 + }, + { + "epoch": 0.3247999251181729, + "loss": 1.2581514120101929, + "loss_ce": 0.009127913974225521, + "loss_iou": 0.494140625, + "loss_num": 0.052001953125, + "loss_xval": 1.25, + "num_input_tokens_seen": 229826152, + "step": 3470 + }, + { + "epoch": 0.3248935274020686, + "grad_norm": 25.765979766845703, + "learning_rate": 5e-05, + "loss": 1.503, + "num_input_tokens_seen": 229891876, + "step": 3471 + }, + { + "epoch": 0.3248935274020686, + "loss": 1.4254307746887207, + "loss_ce": 0.0030675381422042847, + "loss_iou": 0.625, + "loss_num": 0.03515625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 229891876, + "step": 3471 + }, + { + "epoch": 0.3249871296859643, + "grad_norm": 35.62071990966797, + "learning_rate": 5e-05, + "loss": 1.4101, + "num_input_tokens_seen": 229958552, + "step": 3472 + }, + { + "epoch": 0.3249871296859643, + "loss": 1.3482264280319214, + "loss_ce": 0.008382691070437431, + "loss_iou": 0.5390625, + "loss_num": 0.05224609375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 229958552, + "step": 3472 + }, + { + "epoch": 0.32508073196986004, + "grad_norm": 23.053340911865234, + "learning_rate": 5e-05, + "loss": 1.4256, + "num_input_tokens_seen": 230024216, + "step": 3473 + }, + { + "epoch": 0.32508073196986004, + "loss": 1.2874468564987183, + "loss_ce": 0.005708581767976284, + "loss_iou": 0.5078125, + "loss_num": 0.05322265625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 230024216, + "step": 3473 + }, + { + "epoch": 0.3251743342537558, + "grad_norm": 29.020132064819336, + "learning_rate": 5e-05, + "loss": 1.2004, + "num_input_tokens_seen": 230090000, + "step": 3474 + }, + { + "epoch": 0.3251743342537558, + "loss": 1.4137232303619385, + "loss_ce": 0.00356693915091455, + "loss_iou": 0.578125, + "loss_num": 0.050048828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 230090000, + "step": 3474 + }, + { + "epoch": 0.3252679365376515, + "grad_norm": 27.151784896850586, + "learning_rate": 5e-05, + "loss": 1.2197, + "num_input_tokens_seen": 230157512, + "step": 3475 + }, + { + "epoch": 0.3252679365376515, + "loss": 1.3149802684783936, + "loss_ce": 0.006386419292539358, + "loss_iou": 0.53125, + "loss_num": 0.04931640625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 230157512, + "step": 3475 + }, + { + "epoch": 0.3253615388215472, + "grad_norm": 28.80777359008789, + "learning_rate": 5e-05, + "loss": 1.4261, + "num_input_tokens_seen": 230222736, + "step": 3476 + }, + { + "epoch": 0.3253615388215472, + "loss": 1.5021616220474243, + "loss_ce": 0.00435891468077898, + "loss_iou": 0.61328125, + "loss_num": 0.0546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 230222736, + "step": 3476 + }, + { + "epoch": 0.325455141105443, + "grad_norm": 30.501630783081055, + "learning_rate": 5e-05, + "loss": 1.4853, + "num_input_tokens_seen": 230289736, + "step": 3477 + }, + { + "epoch": 0.325455141105443, + "loss": 1.626631259918213, + "loss_ce": 0.006514023058116436, + "loss_iou": 0.625, + "loss_num": 0.0732421875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 230289736, + "step": 3477 + }, + { + "epoch": 0.3255487433893387, + "grad_norm": 27.62831687927246, + "learning_rate": 5e-05, + "loss": 1.4628, + "num_input_tokens_seen": 230357544, + "step": 3478 + }, + { + "epoch": 0.3255487433893387, + "loss": 1.2684128284454346, + "loss_ce": 0.004252570681273937, + "loss_iou": 0.5, + "loss_num": 0.052490234375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 230357544, + "step": 3478 + }, + { + "epoch": 0.3256423456732344, + "grad_norm": 17.582786560058594, + "learning_rate": 5e-05, + "loss": 1.2282, + "num_input_tokens_seen": 230424296, + "step": 3479 + }, + { + "epoch": 0.3256423456732344, + "loss": 1.0699564218521118, + "loss_ce": 0.006479851435869932, + "loss_iou": 0.421875, + "loss_num": 0.043701171875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 230424296, + "step": 3479 + }, + { + "epoch": 0.3257359479571302, + "grad_norm": 50.17025375366211, + "learning_rate": 5e-05, + "loss": 1.3563, + "num_input_tokens_seen": 230491124, + "step": 3480 + }, + { + "epoch": 0.3257359479571302, + "loss": 1.348001480102539, + "loss_ce": 0.008157771080732346, + "loss_iou": 0.5546875, + "loss_num": 0.046142578125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 230491124, + "step": 3480 + }, + { + "epoch": 0.3258295502410259, + "grad_norm": 28.259422302246094, + "learning_rate": 5e-05, + "loss": 1.1982, + "num_input_tokens_seen": 230557228, + "step": 3481 + }, + { + "epoch": 0.3258295502410259, + "loss": 1.3323099613189697, + "loss_ce": 0.005649839527904987, + "loss_iou": 0.515625, + "loss_num": 0.058837890625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 230557228, + "step": 3481 + }, + { + "epoch": 0.3259231525249216, + "grad_norm": 42.412147521972656, + "learning_rate": 5e-05, + "loss": 1.5952, + "num_input_tokens_seen": 230623592, + "step": 3482 + }, + { + "epoch": 0.3259231525249216, + "loss": 1.6786226034164429, + "loss_ce": 0.007724075112491846, + "loss_iou": 0.6640625, + "loss_num": 0.0693359375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 230623592, + "step": 3482 + }, + { + "epoch": 0.3260167548088173, + "grad_norm": 28.540069580078125, + "learning_rate": 5e-05, + "loss": 1.2564, + "num_input_tokens_seen": 230690916, + "step": 3483 + }, + { + "epoch": 0.3260167548088173, + "loss": 0.9094557166099548, + "loss_ce": 0.0036939966958016157, + "loss_iou": 0.40625, + "loss_num": 0.01904296875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 230690916, + "step": 3483 + }, + { + "epoch": 0.3261103570927131, + "grad_norm": 23.447139739990234, + "learning_rate": 5e-05, + "loss": 1.4811, + "num_input_tokens_seen": 230756944, + "step": 3484 + }, + { + "epoch": 0.3261103570927131, + "loss": 1.3724188804626465, + "loss_ce": 0.006207851227372885, + "loss_iou": 0.55078125, + "loss_num": 0.052978515625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 230756944, + "step": 3484 + }, + { + "epoch": 0.3262039593766088, + "grad_norm": 22.987640380859375, + "learning_rate": 5e-05, + "loss": 1.5972, + "num_input_tokens_seen": 230822520, + "step": 3485 + }, + { + "epoch": 0.3262039593766088, + "loss": 1.3434700965881348, + "loss_ce": 0.007044360972940922, + "loss_iou": 0.5625, + "loss_num": 0.042236328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 230822520, + "step": 3485 + }, + { + "epoch": 0.3262975616605045, + "grad_norm": 29.072772979736328, + "learning_rate": 5e-05, + "loss": 1.4876, + "num_input_tokens_seen": 230889188, + "step": 3486 + }, + { + "epoch": 0.3262975616605045, + "loss": 1.4115326404571533, + "loss_ce": 0.009188849478960037, + "loss_iou": 0.58984375, + "loss_num": 0.043701171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 230889188, + "step": 3486 + }, + { + "epoch": 0.32639116394440026, + "grad_norm": 54.610877990722656, + "learning_rate": 5e-05, + "loss": 1.7372, + "num_input_tokens_seen": 230954712, + "step": 3487 + }, + { + "epoch": 0.32639116394440026, + "loss": 1.5750269889831543, + "loss_ce": 0.007644145283848047, + "loss_iou": 0.640625, + "loss_num": 0.05712890625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 230954712, + "step": 3487 + }, + { + "epoch": 0.326484766228296, + "grad_norm": 25.548595428466797, + "learning_rate": 5e-05, + "loss": 1.6614, + "num_input_tokens_seen": 231021184, + "step": 3488 + }, + { + "epoch": 0.326484766228296, + "loss": 1.8577330112457275, + "loss_ce": 0.009100107476115227, + "loss_iou": 0.7578125, + "loss_num": 0.06640625, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 231021184, + "step": 3488 + }, + { + "epoch": 0.3265783685121917, + "grad_norm": 22.18245506286621, + "learning_rate": 5e-05, + "loss": 1.257, + "num_input_tokens_seen": 231087584, + "step": 3489 + }, + { + "epoch": 0.3265783685121917, + "loss": 1.3807964324951172, + "loss_ce": 0.007749560289084911, + "loss_iou": 0.578125, + "loss_num": 0.044189453125, + "loss_xval": 1.375, + "num_input_tokens_seen": 231087584, + "step": 3489 + }, + { + "epoch": 0.32667197079608745, + "grad_norm": 17.516324996948242, + "learning_rate": 5e-05, + "loss": 1.1775, + "num_input_tokens_seen": 231154032, + "step": 3490 + }, + { + "epoch": 0.32667197079608745, + "loss": 1.2251875400543213, + "loss_ce": 0.006651208270341158, + "loss_iou": 0.486328125, + "loss_num": 0.049072265625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 231154032, + "step": 3490 + }, + { + "epoch": 0.32676557307998316, + "grad_norm": 23.85236167907715, + "learning_rate": 5e-05, + "loss": 1.3247, + "num_input_tokens_seen": 231221056, + "step": 3491 + }, + { + "epoch": 0.32676557307998316, + "loss": 1.2391154766082764, + "loss_ce": 0.005228670779615641, + "loss_iou": 0.55078125, + "loss_num": 0.0262451171875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 231221056, + "step": 3491 + }, + { + "epoch": 0.3268591753638789, + "grad_norm": 22.00735855102539, + "learning_rate": 5e-05, + "loss": 1.3966, + "num_input_tokens_seen": 231288024, + "step": 3492 + }, + { + "epoch": 0.3268591753638789, + "loss": 1.255366325378418, + "loss_ce": 0.006342868786305189, + "loss_iou": 0.54296875, + "loss_num": 0.031982421875, + "loss_xval": 1.25, + "num_input_tokens_seen": 231288024, + "step": 3492 + }, + { + "epoch": 0.3269527776477746, + "grad_norm": 52.37733840942383, + "learning_rate": 5e-05, + "loss": 1.508, + "num_input_tokens_seen": 231354548, + "step": 3493 + }, + { + "epoch": 0.3269527776477746, + "loss": 1.467725396156311, + "loss_ce": 0.008252738043665886, + "loss_iou": 0.5859375, + "loss_num": 0.056396484375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 231354548, + "step": 3493 + }, + { + "epoch": 0.32704637993167035, + "grad_norm": 28.013761520385742, + "learning_rate": 5e-05, + "loss": 1.4415, + "num_input_tokens_seen": 231421152, + "step": 3494 + }, + { + "epoch": 0.32704637993167035, + "loss": 1.332025408744812, + "loss_ce": 0.005365224555134773, + "loss_iou": 0.5859375, + "loss_num": 0.031005859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 231421152, + "step": 3494 + }, + { + "epoch": 0.32713998221556606, + "grad_norm": 26.436290740966797, + "learning_rate": 5e-05, + "loss": 1.2413, + "num_input_tokens_seen": 231487444, + "step": 3495 + }, + { + "epoch": 0.32713998221556606, + "loss": 1.2978804111480713, + "loss_ce": 0.006376506295055151, + "loss_iou": 0.54296875, + "loss_num": 0.041015625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 231487444, + "step": 3495 + }, + { + "epoch": 0.32723358449946177, + "grad_norm": 19.611053466796875, + "learning_rate": 5e-05, + "loss": 1.2601, + "num_input_tokens_seen": 231553776, + "step": 3496 + }, + { + "epoch": 0.32723358449946177, + "loss": 1.4286245107650757, + "loss_ce": 0.005772912874817848, + "loss_iou": 0.56640625, + "loss_num": 0.058349609375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 231553776, + "step": 3496 + }, + { + "epoch": 0.32732718678335754, + "grad_norm": 26.257793426513672, + "learning_rate": 5e-05, + "loss": 1.5374, + "num_input_tokens_seen": 231620348, + "step": 3497 + }, + { + "epoch": 0.32732718678335754, + "loss": 1.6531095504760742, + "loss_ce": 0.006625092122703791, + "loss_iou": 0.6328125, + "loss_num": 0.07666015625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 231620348, + "step": 3497 + }, + { + "epoch": 0.32742078906725325, + "grad_norm": 40.22646713256836, + "learning_rate": 5e-05, + "loss": 1.4268, + "num_input_tokens_seen": 231686712, + "step": 3498 + }, + { + "epoch": 0.32742078906725325, + "loss": 1.428750991821289, + "loss_ce": 0.004922924097627401, + "loss_iou": 0.609375, + "loss_num": 0.04052734375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 231686712, + "step": 3498 + }, + { + "epoch": 0.32751439135114896, + "grad_norm": 26.511734008789062, + "learning_rate": 5e-05, + "loss": 1.2521, + "num_input_tokens_seen": 231753180, + "step": 3499 + }, + { + "epoch": 0.32751439135114896, + "loss": 1.258674144744873, + "loss_ce": 0.007697533816099167, + "loss_iou": 0.50390625, + "loss_num": 0.04833984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 231753180, + "step": 3499 + }, + { + "epoch": 0.32760799363504467, + "grad_norm": 34.28484344482422, + "learning_rate": 5e-05, + "loss": 1.5833, + "num_input_tokens_seen": 231819164, + "step": 3500 + }, + { + "epoch": 0.32760799363504467, + "eval_seeclick_CIoU": 0.15782961249351501, + "eval_seeclick_GIoU": 0.16827442497015, + "eval_seeclick_IoU": 0.2891116961836815, + "eval_seeclick_MAE_all": 0.1535521298646927, + "eval_seeclick_MAE_h": 0.06641276739537716, + "eval_seeclick_MAE_w": 0.13540880382061005, + "eval_seeclick_MAE_x_boxes": 0.2435053288936615, + "eval_seeclick_MAE_y_boxes": 0.1485762670636177, + "eval_seeclick_NUM_probability": 0.9999370872974396, + "eval_seeclick_inside_bbox": 0.4364583343267441, + "eval_seeclick_loss": 2.501735210418701, + "eval_seeclick_loss_ce": 0.0145033891312778, + "eval_seeclick_loss_iou": 0.864501953125, + "eval_seeclick_loss_num": 0.15167999267578125, + "eval_seeclick_loss_xval": 2.48779296875, + "eval_seeclick_runtime": 65.6185, + "eval_seeclick_samples_per_second": 0.716, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 231819164, + "step": 3500 + }, + { + "epoch": 0.32760799363504467, + "eval_icons_CIoU": -0.11122158542275429, + "eval_icons_GIoU": 0.01340615563094616, + "eval_icons_IoU": 0.09425394982099533, + "eval_icons_MAE_all": 0.20621279627084732, + "eval_icons_MAE_h": 0.22439535707235336, + "eval_icons_MAE_w": 0.19666418433189392, + "eval_icons_MAE_x_boxes": 0.13402969017624855, + "eval_icons_MAE_y_boxes": 0.08551926910877228, + "eval_icons_NUM_probability": 0.9997999966144562, + "eval_icons_inside_bbox": 0.1614583358168602, + "eval_icons_loss": 3.042537212371826, + "eval_icons_loss_ce": 3.3613167943258304e-05, + "eval_icons_loss_iou": 1.005615234375, + "eval_icons_loss_num": 0.22802734375, + "eval_icons_loss_xval": 3.15087890625, + "eval_icons_runtime": 73.5505, + "eval_icons_samples_per_second": 0.68, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 231819164, + "step": 3500 + }, + { + "epoch": 0.32760799363504467, + "eval_screenspot_CIoU": -0.04845000927646955, + "eval_screenspot_GIoU": -0.01256128524740537, + "eval_screenspot_IoU": 0.14887459576129913, + "eval_screenspot_MAE_all": 0.22817763686180115, + "eval_screenspot_MAE_h": 0.179355318347613, + "eval_screenspot_MAE_w": 0.18963390588760376, + "eval_screenspot_MAE_x_boxes": 0.3209350109100342, + "eval_screenspot_MAE_y_boxes": 0.13483184576034546, + "eval_screenspot_NUM_probability": 0.9999322891235352, + "eval_screenspot_inside_bbox": 0.2970833381017049, + "eval_screenspot_loss": 3.1953461170196533, + "eval_screenspot_loss_ce": 0.0071808453649282455, + "eval_screenspot_loss_iou": 1.0323893229166667, + "eval_screenspot_loss_num": 0.23490397135416666, + "eval_screenspot_loss_xval": 3.2389322916666665, + "eval_screenspot_runtime": 122.7704, + "eval_screenspot_samples_per_second": 0.725, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 231819164, + "step": 3500 + }, + { + "epoch": 0.32760799363504467, + "eval_compot_CIoU": -0.07480915263295174, + "eval_compot_GIoU": -0.026930993422865868, + "eval_compot_IoU": 0.11974719911813736, + "eval_compot_MAE_all": 0.2226223200559616, + "eval_compot_MAE_h": 0.19979272037744522, + "eval_compot_MAE_w": 0.22494399547576904, + "eval_compot_MAE_x_boxes": 0.19063614308834076, + "eval_compot_MAE_y_boxes": 0.1315009444952011, + "eval_compot_NUM_probability": 0.9999416470527649, + "eval_compot_inside_bbox": 0.2204861119389534, + "eval_compot_loss": 3.1513233184814453, + "eval_compot_loss_ce": 0.0035701930755749345, + "eval_compot_loss_iou": 1.043212890625, + "eval_compot_loss_num": 0.227264404296875, + "eval_compot_loss_xval": 3.22216796875, + "eval_compot_runtime": 67.5686, + "eval_compot_samples_per_second": 0.74, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 231819164, + "step": 3500 + }, + { + "epoch": 0.32760799363504467, + "eval_custom_ui_MAE_all": 0.16447503119707108, + "eval_custom_ui_MAE_x": 0.16267096251249313, + "eval_custom_ui_MAE_y": 0.16627910733222961, + "eval_custom_ui_NUM_probability": 0.9999749958515167, + "eval_custom_ui_loss": 0.9318787455558777, + "eval_custom_ui_loss_ce": 0.18506206572055817, + "eval_custom_ui_loss_num": 0.15753173828125, + "eval_custom_ui_loss_xval": 0.787353515625, + "eval_custom_ui_runtime": 53.7817, + "eval_custom_ui_samples_per_second": 0.93, + "eval_custom_ui_steps_per_second": 0.037, + "num_input_tokens_seen": 231819164, + "step": 3500 + }, + { + "epoch": 0.32760799363504467, + "loss": 1.0177785158157349, + "loss_ce": 0.20918476581573486, + "loss_iou": 0.0, + "loss_num": 0.162109375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 231819164, + "step": 3500 + }, + { + "epoch": 0.32770159591894044, + "grad_norm": 20.540634155273438, + "learning_rate": 5e-05, + "loss": 1.1884, + "num_input_tokens_seen": 231885948, + "step": 3501 + }, + { + "epoch": 0.32770159591894044, + "loss": 1.1003904342651367, + "loss_ce": 0.007617032155394554, + "loss_iou": 0.470703125, + "loss_num": 0.03076171875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 231885948, + "step": 3501 + }, + { + "epoch": 0.32779519820283615, + "grad_norm": 15.167670249938965, + "learning_rate": 5e-05, + "loss": 1.261, + "num_input_tokens_seen": 231952416, + "step": 3502 + }, + { + "epoch": 0.32779519820283615, + "loss": 0.9802899956703186, + "loss_ce": 0.004704044200479984, + "loss_iou": 0.416015625, + "loss_num": 0.02880859375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 231952416, + "step": 3502 + }, + { + "epoch": 0.32788880048673186, + "grad_norm": 19.73589324951172, + "learning_rate": 5e-05, + "loss": 1.2712, + "num_input_tokens_seen": 232017704, + "step": 3503 + }, + { + "epoch": 0.32788880048673186, + "loss": 1.255335807800293, + "loss_ce": 0.008265480399131775, + "loss_iou": 0.515625, + "loss_num": 0.043701171875, + "loss_xval": 1.25, + "num_input_tokens_seen": 232017704, + "step": 3503 + }, + { + "epoch": 0.3279824027706276, + "grad_norm": 23.826526641845703, + "learning_rate": 5e-05, + "loss": 1.5358, + "num_input_tokens_seen": 232083420, + "step": 3504 + }, + { + "epoch": 0.3279824027706276, + "loss": 1.756319284439087, + "loss_ce": 0.011202055960893631, + "loss_iou": 0.703125, + "loss_num": 0.0673828125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 232083420, + "step": 3504 + }, + { + "epoch": 0.32807600505452333, + "grad_norm": 24.73110580444336, + "learning_rate": 5e-05, + "loss": 1.3299, + "num_input_tokens_seen": 232149428, + "step": 3505 + }, + { + "epoch": 0.32807600505452333, + "loss": 1.3241204023361206, + "loss_ce": 0.006737629882991314, + "loss_iou": 0.5546875, + "loss_num": 0.042724609375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 232149428, + "step": 3505 + }, + { + "epoch": 0.32816960733841904, + "grad_norm": 39.34742736816406, + "learning_rate": 5e-05, + "loss": 1.4139, + "num_input_tokens_seen": 232215984, + "step": 3506 + }, + { + "epoch": 0.32816960733841904, + "loss": 1.528343915939331, + "loss_ce": 0.006859532557427883, + "loss_iou": 0.609375, + "loss_num": 0.060546875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 232215984, + "step": 3506 + }, + { + "epoch": 0.3282632096223148, + "grad_norm": 79.795166015625, + "learning_rate": 5e-05, + "loss": 1.562, + "num_input_tokens_seen": 232282256, + "step": 3507 + }, + { + "epoch": 0.3282632096223148, + "loss": 1.648977518081665, + "loss_ce": 0.003469696966931224, + "loss_iou": 0.66015625, + "loss_num": 0.0654296875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 232282256, + "step": 3507 + }, + { + "epoch": 0.3283568119062105, + "grad_norm": 16.693782806396484, + "learning_rate": 5e-05, + "loss": 1.0874, + "num_input_tokens_seen": 232347496, + "step": 3508 + }, + { + "epoch": 0.3283568119062105, + "loss": 1.1710684299468994, + "loss_ce": 0.008226733654737473, + "loss_iou": 0.462890625, + "loss_num": 0.047607421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 232347496, + "step": 3508 + }, + { + "epoch": 0.32845041419010623, + "grad_norm": 17.354244232177734, + "learning_rate": 5e-05, + "loss": 1.3225, + "num_input_tokens_seen": 232413608, + "step": 3509 + }, + { + "epoch": 0.32845041419010623, + "loss": 1.2690069675445557, + "loss_ce": 0.011194521561264992, + "loss_iou": 0.484375, + "loss_num": 0.057861328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 232413608, + "step": 3509 + }, + { + "epoch": 0.32854401647400194, + "grad_norm": 29.071693420410156, + "learning_rate": 5e-05, + "loss": 1.3773, + "num_input_tokens_seen": 232479400, + "step": 3510 + }, + { + "epoch": 0.32854401647400194, + "loss": 1.4294288158416748, + "loss_ce": 0.008042161352932453, + "loss_iou": 0.59375, + "loss_num": 0.046875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 232479400, + "step": 3510 + }, + { + "epoch": 0.3286376187578977, + "grad_norm": 20.38787841796875, + "learning_rate": 5e-05, + "loss": 1.7461, + "num_input_tokens_seen": 232545720, + "step": 3511 + }, + { + "epoch": 0.3286376187578977, + "loss": 1.6558558940887451, + "loss_ce": 0.006441822741180658, + "loss_iou": 0.7109375, + "loss_num": 0.044677734375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 232545720, + "step": 3511 + }, + { + "epoch": 0.3287312210417934, + "grad_norm": 20.92414665222168, + "learning_rate": 5e-05, + "loss": 1.2691, + "num_input_tokens_seen": 232614272, + "step": 3512 + }, + { + "epoch": 0.3287312210417934, + "loss": 1.2079880237579346, + "loss_ce": 0.010234096087515354, + "loss_iou": 0.498046875, + "loss_num": 0.040283203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 232614272, + "step": 3512 + }, + { + "epoch": 0.32882482332568913, + "grad_norm": 20.686065673828125, + "learning_rate": 5e-05, + "loss": 1.4184, + "num_input_tokens_seen": 232680532, + "step": 3513 + }, + { + "epoch": 0.32882482332568913, + "loss": 1.3474245071411133, + "loss_ce": 0.0036745343822985888, + "loss_iou": 0.59375, + "loss_num": 0.03173828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 232680532, + "step": 3513 + }, + { + "epoch": 0.3289184256095849, + "grad_norm": 15.067450523376465, + "learning_rate": 5e-05, + "loss": 1.2326, + "num_input_tokens_seen": 232747440, + "step": 3514 + }, + { + "epoch": 0.3289184256095849, + "loss": 1.2044594287872314, + "loss_ce": 0.007682022638618946, + "loss_iou": 0.46875, + "loss_num": 0.052001953125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 232747440, + "step": 3514 + }, + { + "epoch": 0.3290120278934806, + "grad_norm": 23.287527084350586, + "learning_rate": 5e-05, + "loss": 1.3241, + "num_input_tokens_seen": 232813848, + "step": 3515 + }, + { + "epoch": 0.3290120278934806, + "loss": 1.3500601053237915, + "loss_ce": 0.0033803777769207954, + "loss_iou": 0.53515625, + "loss_num": 0.0556640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 232813848, + "step": 3515 + }, + { + "epoch": 0.3291056301773763, + "grad_norm": 36.191043853759766, + "learning_rate": 5e-05, + "loss": 1.2991, + "num_input_tokens_seen": 232879480, + "step": 3516 + }, + { + "epoch": 0.3291056301773763, + "loss": 1.3669815063476562, + "loss_ce": 0.004676827695220709, + "loss_iou": 0.578125, + "loss_num": 0.041259765625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 232879480, + "step": 3516 + }, + { + "epoch": 0.32919923246127203, + "grad_norm": 137.72544860839844, + "learning_rate": 5e-05, + "loss": 1.6931, + "num_input_tokens_seen": 232945264, + "step": 3517 + }, + { + "epoch": 0.32919923246127203, + "loss": 1.6234352588653564, + "loss_ce": 0.0024635731242597103, + "loss_iou": 0.609375, + "loss_num": 0.080078125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 232945264, + "step": 3517 + }, + { + "epoch": 0.3292928347451678, + "grad_norm": 58.17002487182617, + "learning_rate": 5e-05, + "loss": 1.3909, + "num_input_tokens_seen": 233011808, + "step": 3518 + }, + { + "epoch": 0.3292928347451678, + "loss": 1.3913087844848633, + "loss_ce": 0.00556666124612093, + "loss_iou": 0.59375, + "loss_num": 0.0400390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 233011808, + "step": 3518 + }, + { + "epoch": 0.3293864370290635, + "grad_norm": 46.489566802978516, + "learning_rate": 5e-05, + "loss": 1.4017, + "num_input_tokens_seen": 233078812, + "step": 3519 + }, + { + "epoch": 0.3293864370290635, + "loss": 1.4041200876235962, + "loss_ce": 0.004706018604338169, + "loss_iou": 0.578125, + "loss_num": 0.04833984375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 233078812, + "step": 3519 + }, + { + "epoch": 0.3294800393129592, + "grad_norm": 23.331544876098633, + "learning_rate": 5e-05, + "loss": 1.5535, + "num_input_tokens_seen": 233145912, + "step": 3520 + }, + { + "epoch": 0.3294800393129592, + "loss": 1.7464112043380737, + "loss_ce": 0.007153346668928862, + "loss_iou": 0.734375, + "loss_num": 0.054443359375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 233145912, + "step": 3520 + }, + { + "epoch": 0.329573641596855, + "grad_norm": 25.761667251586914, + "learning_rate": 5e-05, + "loss": 1.1247, + "num_input_tokens_seen": 233212740, + "step": 3521 + }, + { + "epoch": 0.329573641596855, + "loss": 1.111678123474121, + "loss_ce": 0.003279757220298052, + "loss_iou": 0.44140625, + "loss_num": 0.044921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 233212740, + "step": 3521 + }, + { + "epoch": 0.3296672438807507, + "grad_norm": 32.18890380859375, + "learning_rate": 5e-05, + "loss": 1.3776, + "num_input_tokens_seen": 233279872, + "step": 3522 + }, + { + "epoch": 0.3296672438807507, + "loss": 1.3662781715393066, + "loss_ce": 0.006414836272597313, + "loss_iou": 0.5703125, + "loss_num": 0.044677734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 233279872, + "step": 3522 + }, + { + "epoch": 0.3297608461646464, + "grad_norm": 22.280868530273438, + "learning_rate": 5e-05, + "loss": 1.5736, + "num_input_tokens_seen": 233346380, + "step": 3523 + }, + { + "epoch": 0.3297608461646464, + "loss": 1.5078675746917725, + "loss_ce": 0.002984786406159401, + "loss_iou": 0.6484375, + "loss_num": 0.04150390625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 233346380, + "step": 3523 + }, + { + "epoch": 0.32985444844854217, + "grad_norm": 15.169958114624023, + "learning_rate": 5e-05, + "loss": 1.1784, + "num_input_tokens_seen": 233411988, + "step": 3524 + }, + { + "epoch": 0.32985444844854217, + "loss": 1.1074304580688477, + "loss_ce": 0.004647225607186556, + "loss_iou": 0.451171875, + "loss_num": 0.040283203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 233411988, + "step": 3524 + }, + { + "epoch": 0.3299480507324379, + "grad_norm": 39.23529052734375, + "learning_rate": 5e-05, + "loss": 1.3854, + "num_input_tokens_seen": 233478972, + "step": 3525 + }, + { + "epoch": 0.3299480507324379, + "loss": 1.3963702917099, + "loss_ce": 0.008186709135770798, + "loss_iou": 0.51953125, + "loss_num": 0.06982421875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 233478972, + "step": 3525 + }, + { + "epoch": 0.3300416530163336, + "grad_norm": 26.459463119506836, + "learning_rate": 5e-05, + "loss": 1.2726, + "num_input_tokens_seen": 233545680, + "step": 3526 + }, + { + "epoch": 0.3300416530163336, + "loss": 1.198699951171875, + "loss_ce": 0.0053405482321977615, + "loss_iou": 0.5234375, + "loss_num": 0.0302734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 233545680, + "step": 3526 + }, + { + "epoch": 0.3301352553002293, + "grad_norm": 19.502931594848633, + "learning_rate": 5e-05, + "loss": 1.494, + "num_input_tokens_seen": 233611424, + "step": 3527 + }, + { + "epoch": 0.3301352553002293, + "loss": 1.7368769645690918, + "loss_ce": 0.007384879048913717, + "loss_iou": 0.6875, + "loss_num": 0.07080078125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 233611424, + "step": 3527 + }, + { + "epoch": 0.33022885758412507, + "grad_norm": 20.541210174560547, + "learning_rate": 5e-05, + "loss": 1.2868, + "num_input_tokens_seen": 233677900, + "step": 3528 + }, + { + "epoch": 0.33022885758412507, + "loss": 1.276645302772522, + "loss_ce": 0.0036960765719413757, + "loss_iou": 0.5625, + "loss_num": 0.02880859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 233677900, + "step": 3528 + }, + { + "epoch": 0.3303224598680208, + "grad_norm": 38.70900344848633, + "learning_rate": 5e-05, + "loss": 1.6072, + "num_input_tokens_seen": 233744480, + "step": 3529 + }, + { + "epoch": 0.3303224598680208, + "loss": 1.5182149410247803, + "loss_ce": 0.009914163500070572, + "loss_iou": 0.62109375, + "loss_num": 0.053466796875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 233744480, + "step": 3529 + }, + { + "epoch": 0.3304160621519165, + "grad_norm": 29.879817962646484, + "learning_rate": 5e-05, + "loss": 1.4821, + "num_input_tokens_seen": 233810300, + "step": 3530 + }, + { + "epoch": 0.3304160621519165, + "loss": 1.501889944076538, + "loss_ce": 0.003354812040925026, + "loss_iou": 0.640625, + "loss_num": 0.0439453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 233810300, + "step": 3530 + }, + { + "epoch": 0.33050966443581226, + "grad_norm": 17.80263328552246, + "learning_rate": 5e-05, + "loss": 1.3036, + "num_input_tokens_seen": 233876360, + "step": 3531 + }, + { + "epoch": 0.33050966443581226, + "loss": 1.0629225969314575, + "loss_ce": 0.004817101173102856, + "loss_iou": 0.408203125, + "loss_num": 0.04833984375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 233876360, + "step": 3531 + }, + { + "epoch": 0.33060326671970797, + "grad_norm": 39.266719818115234, + "learning_rate": 5e-05, + "loss": 1.3206, + "num_input_tokens_seen": 233943072, + "step": 3532 + }, + { + "epoch": 0.33060326671970797, + "loss": 1.2983803749084473, + "loss_ce": 0.007364715449512005, + "loss_iou": 0.5234375, + "loss_num": 0.049072265625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 233943072, + "step": 3532 + }, + { + "epoch": 0.3306968690036037, + "grad_norm": 27.344268798828125, + "learning_rate": 5e-05, + "loss": 1.5397, + "num_input_tokens_seen": 234008400, + "step": 3533 + }, + { + "epoch": 0.3306968690036037, + "loss": 1.5955493450164795, + "loss_ce": 0.003264123573899269, + "loss_iou": 0.640625, + "loss_num": 0.06201171875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 234008400, + "step": 3533 + }, + { + "epoch": 0.3307904712874994, + "grad_norm": 24.001371383666992, + "learning_rate": 5e-05, + "loss": 1.4408, + "num_input_tokens_seen": 234074216, + "step": 3534 + }, + { + "epoch": 0.3307904712874994, + "loss": 1.3274438381195068, + "loss_ce": 0.004445748869329691, + "loss_iou": 0.5859375, + "loss_num": 0.0302734375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 234074216, + "step": 3534 + }, + { + "epoch": 0.33088407357139515, + "grad_norm": 18.622882843017578, + "learning_rate": 5e-05, + "loss": 1.3406, + "num_input_tokens_seen": 234140852, + "step": 3535 + }, + { + "epoch": 0.33088407357139515, + "loss": 1.2333338260650635, + "loss_ce": 0.0048182448372244835, + "loss_iou": 0.515625, + "loss_num": 0.0400390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 234140852, + "step": 3535 + }, + { + "epoch": 0.33097767585529086, + "grad_norm": 25.497224807739258, + "learning_rate": 5e-05, + "loss": 1.1527, + "num_input_tokens_seen": 234206304, + "step": 3536 + }, + { + "epoch": 0.33097767585529086, + "loss": 1.274425983428955, + "loss_ce": 0.0024532973766326904, + "loss_iou": 0.56640625, + "loss_num": 0.02734375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 234206304, + "step": 3536 + }, + { + "epoch": 0.3310712781391866, + "grad_norm": 24.679630279541016, + "learning_rate": 5e-05, + "loss": 1.2641, + "num_input_tokens_seen": 234272740, + "step": 3537 + }, + { + "epoch": 0.3310712781391866, + "loss": 1.2440797090530396, + "loss_ce": 0.007995770312845707, + "loss_iou": 0.48046875, + "loss_num": 0.054931640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 234272740, + "step": 3537 + }, + { + "epoch": 0.33116488042308234, + "grad_norm": 26.851369857788086, + "learning_rate": 5e-05, + "loss": 1.6072, + "num_input_tokens_seen": 234338928, + "step": 3538 + }, + { + "epoch": 0.33116488042308234, + "loss": 1.5216840505599976, + "loss_ce": 0.00996539369225502, + "loss_iou": 0.51953125, + "loss_num": 0.0947265625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 234338928, + "step": 3538 + }, + { + "epoch": 0.33125848270697805, + "grad_norm": 26.35730743408203, + "learning_rate": 5e-05, + "loss": 1.2772, + "num_input_tokens_seen": 234405568, + "step": 3539 + }, + { + "epoch": 0.33125848270697805, + "loss": 1.2963889837265015, + "loss_ce": 0.003908491227775812, + "loss_iou": 0.54296875, + "loss_num": 0.041259765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 234405568, + "step": 3539 + }, + { + "epoch": 0.33135208499087376, + "grad_norm": 28.880949020385742, + "learning_rate": 5e-05, + "loss": 1.3157, + "num_input_tokens_seen": 234472604, + "step": 3540 + }, + { + "epoch": 0.33135208499087376, + "loss": 1.4409079551696777, + "loss_ce": 0.004384455271065235, + "loss_iou": 0.59765625, + "loss_num": 0.048828125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 234472604, + "step": 3540 + }, + { + "epoch": 0.33144568727476953, + "grad_norm": 38.752498626708984, + "learning_rate": 5e-05, + "loss": 1.1753, + "num_input_tokens_seen": 234537884, + "step": 3541 + }, + { + "epoch": 0.33144568727476953, + "loss": 1.1052851676940918, + "loss_ce": 0.010558545589447021, + "loss_iou": 0.40625, + "loss_num": 0.05615234375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 234537884, + "step": 3541 + }, + { + "epoch": 0.33153928955866524, + "grad_norm": 14.769792556762695, + "learning_rate": 5e-05, + "loss": 1.1402, + "num_input_tokens_seen": 234603852, + "step": 3542 + }, + { + "epoch": 0.33153928955866524, + "loss": 1.3644130229949951, + "loss_ce": 0.0030848467722535133, + "loss_iou": 0.57421875, + "loss_num": 0.041748046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 234603852, + "step": 3542 + }, + { + "epoch": 0.33163289184256095, + "grad_norm": 20.41305923461914, + "learning_rate": 5e-05, + "loss": 1.2194, + "num_input_tokens_seen": 234669296, + "step": 3543 + }, + { + "epoch": 0.33163289184256095, + "loss": 1.207371473312378, + "loss_ce": 0.006199490278959274, + "loss_iou": 0.494140625, + "loss_num": 0.04296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 234669296, + "step": 3543 + }, + { + "epoch": 0.33172649412645666, + "grad_norm": 17.607336044311523, + "learning_rate": 5e-05, + "loss": 1.2414, + "num_input_tokens_seen": 234736068, + "step": 3544 + }, + { + "epoch": 0.33172649412645666, + "loss": 1.2859746217727661, + "loss_ce": 0.004724621307104826, + "loss_iou": 0.55078125, + "loss_num": 0.03564453125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 234736068, + "step": 3544 + }, + { + "epoch": 0.3318200964103524, + "grad_norm": 40.402427673339844, + "learning_rate": 5e-05, + "loss": 1.115, + "num_input_tokens_seen": 234801856, + "step": 3545 + }, + { + "epoch": 0.3318200964103524, + "loss": 1.352484941482544, + "loss_ce": 0.005317067727446556, + "loss_iou": 0.546875, + "loss_num": 0.050537109375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 234801856, + "step": 3545 + }, + { + "epoch": 0.33191369869424814, + "grad_norm": 38.34578323364258, + "learning_rate": 5e-05, + "loss": 1.4438, + "num_input_tokens_seen": 234868392, + "step": 3546 + }, + { + "epoch": 0.33191369869424814, + "loss": 1.2742551565170288, + "loss_ce": 0.009606714360415936, + "loss_iou": 0.5, + "loss_num": 0.052978515625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 234868392, + "step": 3546 + }, + { + "epoch": 0.33200730097814385, + "grad_norm": 26.828020095825195, + "learning_rate": 5e-05, + "loss": 1.7167, + "num_input_tokens_seen": 234934924, + "step": 3547 + }, + { + "epoch": 0.33200730097814385, + "loss": 1.8022754192352295, + "loss_ce": 0.004423964768648148, + "loss_iou": 0.7578125, + "loss_num": 0.056396484375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 234934924, + "step": 3547 + }, + { + "epoch": 0.3321009032620396, + "grad_norm": 73.9911880493164, + "learning_rate": 5e-05, + "loss": 1.4153, + "num_input_tokens_seen": 235001172, + "step": 3548 + }, + { + "epoch": 0.3321009032620396, + "loss": 1.5545170307159424, + "loss_ce": 0.002759157679975033, + "loss_iou": 0.578125, + "loss_num": 0.07958984375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 235001172, + "step": 3548 + }, + { + "epoch": 0.3321945055459353, + "grad_norm": 12.100743293762207, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 235067556, + "step": 3549 + }, + { + "epoch": 0.3321945055459353, + "loss": 1.3225867748260498, + "loss_ce": 0.009110218845307827, + "loss_iou": 0.515625, + "loss_num": 0.056396484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 235067556, + "step": 3549 + }, + { + "epoch": 0.33228810782983104, + "grad_norm": 38.98204803466797, + "learning_rate": 5e-05, + "loss": 1.1906, + "num_input_tokens_seen": 235133284, + "step": 3550 + }, + { + "epoch": 0.33228810782983104, + "loss": 1.3154993057250977, + "loss_ce": 0.014718085527420044, + "loss_iou": 0.4921875, + "loss_num": 0.06298828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 235133284, + "step": 3550 + }, + { + "epoch": 0.3323817101137268, + "grad_norm": 19.8131160736084, + "learning_rate": 5e-05, + "loss": 1.5927, + "num_input_tokens_seen": 235199728, + "step": 3551 + }, + { + "epoch": 0.3323817101137268, + "loss": 1.679270625114441, + "loss_ce": 0.005442567635327578, + "loss_iou": 0.62890625, + "loss_num": 0.08349609375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 235199728, + "step": 3551 + }, + { + "epoch": 0.3324753123976225, + "grad_norm": 34.68616485595703, + "learning_rate": 5e-05, + "loss": 1.3493, + "num_input_tokens_seen": 235266408, + "step": 3552 + }, + { + "epoch": 0.3324753123976225, + "loss": 1.4741802215576172, + "loss_ce": 0.007383421529084444, + "loss_iou": 0.578125, + "loss_num": 0.06201171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 235266408, + "step": 3552 + }, + { + "epoch": 0.3325689146815182, + "grad_norm": 25.491535186767578, + "learning_rate": 5e-05, + "loss": 1.3788, + "num_input_tokens_seen": 235330740, + "step": 3553 + }, + { + "epoch": 0.3325689146815182, + "loss": 1.0573961734771729, + "loss_ce": 0.004173534922301769, + "loss_iou": 0.453125, + "loss_num": 0.0294189453125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 235330740, + "step": 3553 + }, + { + "epoch": 0.33266251696541393, + "grad_norm": 18.788177490234375, + "learning_rate": 5e-05, + "loss": 1.1589, + "num_input_tokens_seen": 235396160, + "step": 3554 + }, + { + "epoch": 0.33266251696541393, + "loss": 1.2340214252471924, + "loss_ce": 0.006970584392547607, + "loss_iou": 0.48828125, + "loss_num": 0.0498046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 235396160, + "step": 3554 + }, + { + "epoch": 0.3327561192493097, + "grad_norm": 22.256479263305664, + "learning_rate": 5e-05, + "loss": 1.3504, + "num_input_tokens_seen": 235461560, + "step": 3555 + }, + { + "epoch": 0.3327561192493097, + "loss": 1.5194861888885498, + "loss_ce": 0.004349506925791502, + "loss_iou": 0.5703125, + "loss_num": 0.07421875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 235461560, + "step": 3555 + }, + { + "epoch": 0.3328497215332054, + "grad_norm": 29.759389877319336, + "learning_rate": 5e-05, + "loss": 1.2314, + "num_input_tokens_seen": 235527944, + "step": 3556 + }, + { + "epoch": 0.3328497215332054, + "loss": 1.3256677389144897, + "loss_ce": 0.007796605117619038, + "loss_iou": 0.53515625, + "loss_num": 0.050048828125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 235527944, + "step": 3556 + }, + { + "epoch": 0.3329433238171011, + "grad_norm": 25.504209518432617, + "learning_rate": 5e-05, + "loss": 1.3557, + "num_input_tokens_seen": 235593500, + "step": 3557 + }, + { + "epoch": 0.3329433238171011, + "loss": 1.3016202449798584, + "loss_ce": 0.005721805617213249, + "loss_iou": 0.57421875, + "loss_num": 0.029296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 235593500, + "step": 3557 + }, + { + "epoch": 0.3330369261009969, + "grad_norm": 14.457237243652344, + "learning_rate": 5e-05, + "loss": 1.4084, + "num_input_tokens_seen": 235659492, + "step": 3558 + }, + { + "epoch": 0.3330369261009969, + "loss": 1.2264822721481323, + "loss_ce": 0.007244064472615719, + "loss_iou": 0.51953125, + "loss_num": 0.035888671875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 235659492, + "step": 3558 + }, + { + "epoch": 0.3331305283848926, + "grad_norm": 29.743091583251953, + "learning_rate": 5e-05, + "loss": 1.0858, + "num_input_tokens_seen": 235725756, + "step": 3559 + }, + { + "epoch": 0.3331305283848926, + "loss": 0.768072247505188, + "loss_ce": 0.007543675601482391, + "loss_iou": 0.32421875, + "loss_num": 0.0223388671875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 235725756, + "step": 3559 + }, + { + "epoch": 0.3332241306687883, + "grad_norm": 19.524080276489258, + "learning_rate": 5e-05, + "loss": 1.1747, + "num_input_tokens_seen": 235791336, + "step": 3560 + }, + { + "epoch": 0.3332241306687883, + "loss": 1.2219960689544678, + "loss_ce": 0.004222680814564228, + "loss_iou": 0.51171875, + "loss_num": 0.03955078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 235791336, + "step": 3560 + }, + { + "epoch": 0.333317732952684, + "grad_norm": 18.797630310058594, + "learning_rate": 5e-05, + "loss": 1.3116, + "num_input_tokens_seen": 235858364, + "step": 3561 + }, + { + "epoch": 0.333317732952684, + "loss": 1.25065016746521, + "loss_ce": 0.005533017683774233, + "loss_iou": 0.53515625, + "loss_num": 0.034912109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 235858364, + "step": 3561 + }, + { + "epoch": 0.3334113352365798, + "grad_norm": 15.471199035644531, + "learning_rate": 5e-05, + "loss": 1.1297, + "num_input_tokens_seen": 235925348, + "step": 3562 + }, + { + "epoch": 0.3334113352365798, + "loss": 0.9816075563430786, + "loss_ce": 0.004068493843078613, + "loss_iou": 0.435546875, + "loss_num": 0.0211181640625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 235925348, + "step": 3562 + }, + { + "epoch": 0.3335049375204755, + "grad_norm": 59.01436233520508, + "learning_rate": 5e-05, + "loss": 1.1061, + "num_input_tokens_seen": 235989724, + "step": 3563 + }, + { + "epoch": 0.3335049375204755, + "loss": 1.0877387523651123, + "loss_ce": 0.01107865758240223, + "loss_iou": 0.4296875, + "loss_num": 0.04345703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 235989724, + "step": 3563 + }, + { + "epoch": 0.3335985398043712, + "grad_norm": 19.76849365234375, + "learning_rate": 5e-05, + "loss": 1.3762, + "num_input_tokens_seen": 236055836, + "step": 3564 + }, + { + "epoch": 0.3335985398043712, + "loss": 1.3018929958343506, + "loss_ce": 0.00428555253893137, + "loss_iou": 0.4921875, + "loss_num": 0.06298828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 236055836, + "step": 3564 + }, + { + "epoch": 0.333692142088267, + "grad_norm": 42.39924240112305, + "learning_rate": 5e-05, + "loss": 1.3063, + "num_input_tokens_seen": 236121944, + "step": 3565 + }, + { + "epoch": 0.333692142088267, + "loss": 1.2431130409240723, + "loss_ce": 0.009226251393556595, + "loss_iou": 0.55859375, + "loss_num": 0.023193359375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 236121944, + "step": 3565 + }, + { + "epoch": 0.3337857443721627, + "grad_norm": 53.350154876708984, + "learning_rate": 5e-05, + "loss": 1.5743, + "num_input_tokens_seen": 236187768, + "step": 3566 + }, + { + "epoch": 0.3337857443721627, + "loss": 1.5111441612243652, + "loss_ce": 0.0028433436527848244, + "loss_iou": 0.671875, + "loss_num": 0.033203125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 236187768, + "step": 3566 + }, + { + "epoch": 0.3338793466560584, + "grad_norm": 25.451549530029297, + "learning_rate": 5e-05, + "loss": 1.3672, + "num_input_tokens_seen": 236254516, + "step": 3567 + }, + { + "epoch": 0.3338793466560584, + "loss": 1.3634276390075684, + "loss_ce": 0.0050291623920202255, + "loss_iou": 0.5703125, + "loss_num": 0.044677734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 236254516, + "step": 3567 + }, + { + "epoch": 0.33397294893995416, + "grad_norm": 36.89323043823242, + "learning_rate": 5e-05, + "loss": 1.2016, + "num_input_tokens_seen": 236320584, + "step": 3568 + }, + { + "epoch": 0.33397294893995416, + "loss": 1.2182739973068237, + "loss_ce": 0.009777914732694626, + "loss_iou": 0.470703125, + "loss_num": 0.0537109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 236320584, + "step": 3568 + }, + { + "epoch": 0.3340665512238499, + "grad_norm": 24.950538635253906, + "learning_rate": 5e-05, + "loss": 1.1386, + "num_input_tokens_seen": 236387368, + "step": 3569 + }, + { + "epoch": 0.3340665512238499, + "loss": 1.2318916320800781, + "loss_ce": 0.00777044054120779, + "loss_iou": 0.51171875, + "loss_num": 0.041015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 236387368, + "step": 3569 + }, + { + "epoch": 0.3341601535077456, + "grad_norm": 28.292020797729492, + "learning_rate": 5e-05, + "loss": 1.4136, + "num_input_tokens_seen": 236454328, + "step": 3570 + }, + { + "epoch": 0.3341601535077456, + "loss": 1.3414260149002075, + "loss_ce": 0.0069532981142401695, + "loss_iou": 0.5234375, + "loss_num": 0.0576171875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 236454328, + "step": 3570 + }, + { + "epoch": 0.3342537557916413, + "grad_norm": 35.358177185058594, + "learning_rate": 5e-05, + "loss": 1.4104, + "num_input_tokens_seen": 236519344, + "step": 3571 + }, + { + "epoch": 0.3342537557916413, + "loss": 1.4984736442565918, + "loss_ce": 0.004821363370865583, + "loss_iou": 0.6328125, + "loss_num": 0.044921875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 236519344, + "step": 3571 + }, + { + "epoch": 0.33434735807553706, + "grad_norm": 34.11064910888672, + "learning_rate": 5e-05, + "loss": 1.4699, + "num_input_tokens_seen": 236585556, + "step": 3572 + }, + { + "epoch": 0.33434735807553706, + "loss": 1.519061803817749, + "loss_ce": 0.00294864852912724, + "loss_iou": 0.59375, + "loss_num": 0.0654296875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 236585556, + "step": 3572 + }, + { + "epoch": 0.33444096035943277, + "grad_norm": 25.50047492980957, + "learning_rate": 5e-05, + "loss": 1.2505, + "num_input_tokens_seen": 236651276, + "step": 3573 + }, + { + "epoch": 0.33444096035943277, + "loss": 1.2307844161987305, + "loss_ce": 0.006175089627504349, + "loss_iou": 0.5234375, + "loss_num": 0.034912109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 236651276, + "step": 3573 + }, + { + "epoch": 0.3345345626433285, + "grad_norm": 21.302656173706055, + "learning_rate": 5e-05, + "loss": 1.3939, + "num_input_tokens_seen": 236717856, + "step": 3574 + }, + { + "epoch": 0.3345345626433285, + "loss": 1.4176924228668213, + "loss_ce": 0.005582941696047783, + "loss_iou": 0.546875, + "loss_num": 0.06396484375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 236717856, + "step": 3574 + }, + { + "epoch": 0.33462816492722425, + "grad_norm": 69.69845581054688, + "learning_rate": 5e-05, + "loss": 1.1162, + "num_input_tokens_seen": 236784504, + "step": 3575 + }, + { + "epoch": 0.33462816492722425, + "loss": 1.1865599155426025, + "loss_ce": 0.006384147331118584, + "loss_iou": 0.50390625, + "loss_num": 0.03515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 236784504, + "step": 3575 + }, + { + "epoch": 0.33472176721111996, + "grad_norm": 37.370361328125, + "learning_rate": 5e-05, + "loss": 1.2367, + "num_input_tokens_seen": 236849824, + "step": 3576 + }, + { + "epoch": 0.33472176721111996, + "loss": 1.2631953954696655, + "loss_ce": 0.001476649777032435, + "loss_iou": 0.52734375, + "loss_num": 0.042236328125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 236849824, + "step": 3576 + }, + { + "epoch": 0.33481536949501567, + "grad_norm": 34.37902069091797, + "learning_rate": 5e-05, + "loss": 1.416, + "num_input_tokens_seen": 236914832, + "step": 3577 + }, + { + "epoch": 0.33481536949501567, + "loss": 1.3424489498138428, + "loss_ce": 0.006023182068020105, + "loss_iou": 0.54296875, + "loss_num": 0.050048828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 236914832, + "step": 3577 + }, + { + "epoch": 0.3349089717789114, + "grad_norm": 25.483211517333984, + "learning_rate": 5e-05, + "loss": 1.3453, + "num_input_tokens_seen": 236980828, + "step": 3578 + }, + { + "epoch": 0.3349089717789114, + "loss": 1.3032865524291992, + "loss_ce": 0.010806070640683174, + "loss_iou": 0.52734375, + "loss_num": 0.0478515625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 236980828, + "step": 3578 + }, + { + "epoch": 0.33500257406280715, + "grad_norm": 16.853036880493164, + "learning_rate": 5e-05, + "loss": 1.4174, + "num_input_tokens_seen": 237046216, + "step": 3579 + }, + { + "epoch": 0.33500257406280715, + "loss": 1.5309834480285645, + "loss_ce": 0.007545987144112587, + "loss_iou": 0.625, + "loss_num": 0.055419921875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 237046216, + "step": 3579 + }, + { + "epoch": 0.33509617634670286, + "grad_norm": 20.91730499267578, + "learning_rate": 5e-05, + "loss": 1.2171, + "num_input_tokens_seen": 237112608, + "step": 3580 + }, + { + "epoch": 0.33509617634670286, + "loss": 1.0730029344558716, + "loss_ce": 0.005498023703694344, + "loss_iou": 0.453125, + "loss_num": 0.031982421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 237112608, + "step": 3580 + }, + { + "epoch": 0.33518977863059857, + "grad_norm": 18.459970474243164, + "learning_rate": 5e-05, + "loss": 1.4292, + "num_input_tokens_seen": 237178012, + "step": 3581 + }, + { + "epoch": 0.33518977863059857, + "loss": 1.3331146240234375, + "loss_ce": 0.0040131378918886185, + "loss_iou": 0.55859375, + "loss_num": 0.04296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 237178012, + "step": 3581 + }, + { + "epoch": 0.33528338091449433, + "grad_norm": 27.11981773376465, + "learning_rate": 5e-05, + "loss": 1.4251, + "num_input_tokens_seen": 237244296, + "step": 3582 + }, + { + "epoch": 0.33528338091449433, + "loss": 1.3270272016525269, + "loss_ce": 0.013062350451946259, + "loss_iou": 0.5078125, + "loss_num": 0.060791015625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 237244296, + "step": 3582 + }, + { + "epoch": 0.33537698319839004, + "grad_norm": 31.337844848632812, + "learning_rate": 5e-05, + "loss": 1.3725, + "num_input_tokens_seen": 237308300, + "step": 3583 + }, + { + "epoch": 0.33537698319839004, + "loss": 1.4365966320037842, + "loss_ce": 0.009350612759590149, + "loss_iou": 0.60546875, + "loss_num": 0.043701171875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 237308300, + "step": 3583 + }, + { + "epoch": 0.33547058548228575, + "grad_norm": 25.37666893005371, + "learning_rate": 5e-05, + "loss": 1.3299, + "num_input_tokens_seen": 237373976, + "step": 3584 + }, + { + "epoch": 0.33547058548228575, + "loss": 1.2955013513565063, + "loss_ce": 0.00790372584015131, + "loss_iou": 0.50390625, + "loss_num": 0.056640625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 237373976, + "step": 3584 + }, + { + "epoch": 0.3355641877661815, + "grad_norm": 19.894668579101562, + "learning_rate": 5e-05, + "loss": 1.1274, + "num_input_tokens_seen": 237440044, + "step": 3585 + }, + { + "epoch": 0.3355641877661815, + "loss": 1.1329090595245361, + "loss_ce": 0.005574485287070274, + "loss_iou": 0.470703125, + "loss_num": 0.036865234375, + "loss_xval": 1.125, + "num_input_tokens_seen": 237440044, + "step": 3585 + }, + { + "epoch": 0.33565779005007723, + "grad_norm": 31.73177146911621, + "learning_rate": 5e-05, + "loss": 1.5391, + "num_input_tokens_seen": 237506328, + "step": 3586 + }, + { + "epoch": 0.33565779005007723, + "loss": 1.4331083297729492, + "loss_ce": 0.0053739119321107864, + "loss_iou": 0.56640625, + "loss_num": 0.058349609375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 237506328, + "step": 3586 + }, + { + "epoch": 0.33575139233397294, + "grad_norm": 125.68413543701172, + "learning_rate": 5e-05, + "loss": 1.3737, + "num_input_tokens_seen": 237573624, + "step": 3587 + }, + { + "epoch": 0.33575139233397294, + "loss": 1.424976110458374, + "loss_ce": 0.006519145332276821, + "loss_iou": 0.60546875, + "loss_num": 0.041259765625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 237573624, + "step": 3587 + }, + { + "epoch": 0.33584499461786865, + "grad_norm": 140.8628387451172, + "learning_rate": 5e-05, + "loss": 1.3593, + "num_input_tokens_seen": 237640176, + "step": 3588 + }, + { + "epoch": 0.33584499461786865, + "loss": 1.4855228662490845, + "loss_ce": 0.007983766496181488, + "loss_iou": 0.6328125, + "loss_num": 0.041748046875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 237640176, + "step": 3588 + }, + { + "epoch": 0.3359385969017644, + "grad_norm": 28.658716201782227, + "learning_rate": 5e-05, + "loss": 1.5553, + "num_input_tokens_seen": 237706232, + "step": 3589 + }, + { + "epoch": 0.3359385969017644, + "loss": 1.4980051517486572, + "loss_ce": 0.00484110414981842, + "loss_iou": 0.58203125, + "loss_num": 0.06591796875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 237706232, + "step": 3589 + }, + { + "epoch": 0.33603219918566013, + "grad_norm": 39.05143737792969, + "learning_rate": 5e-05, + "loss": 1.428, + "num_input_tokens_seen": 237772796, + "step": 3590 + }, + { + "epoch": 0.33603219918566013, + "loss": 1.4196934700012207, + "loss_ce": 0.0056308722123503685, + "loss_iou": 0.57421875, + "loss_num": 0.052734375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 237772796, + "step": 3590 + }, + { + "epoch": 0.33612580146955584, + "grad_norm": 31.278472900390625, + "learning_rate": 5e-05, + "loss": 1.3124, + "num_input_tokens_seen": 237839628, + "step": 3591 + }, + { + "epoch": 0.33612580146955584, + "loss": 1.3004555702209473, + "loss_ce": 0.004068867303431034, + "loss_iou": 0.5625, + "loss_num": 0.033935546875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 237839628, + "step": 3591 + }, + { + "epoch": 0.3362194037534516, + "grad_norm": 22.228548049926758, + "learning_rate": 5e-05, + "loss": 1.5376, + "num_input_tokens_seen": 237906620, + "step": 3592 + }, + { + "epoch": 0.3362194037534516, + "loss": 1.4838358163833618, + "loss_ce": 0.005320260301232338, + "loss_iou": 0.6484375, + "loss_num": 0.0361328125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 237906620, + "step": 3592 + }, + { + "epoch": 0.3363130060373473, + "grad_norm": 30.330821990966797, + "learning_rate": 5e-05, + "loss": 1.3088, + "num_input_tokens_seen": 237973000, + "step": 3593 + }, + { + "epoch": 0.3363130060373473, + "loss": 1.115976095199585, + "loss_ce": 0.008951054885983467, + "loss_iou": 0.45703125, + "loss_num": 0.03857421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 237973000, + "step": 3593 + }, + { + "epoch": 0.33640660832124303, + "grad_norm": 21.70915985107422, + "learning_rate": 5e-05, + "loss": 1.0476, + "num_input_tokens_seen": 238038964, + "step": 3594 + }, + { + "epoch": 0.33640660832124303, + "loss": 1.1406142711639404, + "loss_ce": 0.005604444537311792, + "loss_iou": 0.4765625, + "loss_num": 0.035888671875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 238038964, + "step": 3594 + }, + { + "epoch": 0.33650021060513874, + "grad_norm": 57.008792877197266, + "learning_rate": 5e-05, + "loss": 1.3146, + "num_input_tokens_seen": 238105212, + "step": 3595 + }, + { + "epoch": 0.33650021060513874, + "loss": 1.3627915382385254, + "loss_ce": 0.008299377746880054, + "loss_iou": 0.5625, + "loss_num": 0.04541015625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 238105212, + "step": 3595 + }, + { + "epoch": 0.3365938128890345, + "grad_norm": 20.6258487701416, + "learning_rate": 5e-05, + "loss": 1.2524, + "num_input_tokens_seen": 238171928, + "step": 3596 + }, + { + "epoch": 0.3365938128890345, + "loss": 1.2348411083221436, + "loss_ce": 0.007302008103579283, + "loss_iou": 0.515625, + "loss_num": 0.039794921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 238171928, + "step": 3596 + }, + { + "epoch": 0.3366874151729302, + "grad_norm": 22.634859085083008, + "learning_rate": 5e-05, + "loss": 1.3681, + "num_input_tokens_seen": 238239024, + "step": 3597 + }, + { + "epoch": 0.3366874151729302, + "loss": 1.4914647340774536, + "loss_ce": 0.00611317390576005, + "loss_iou": 0.609375, + "loss_num": 0.0537109375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 238239024, + "step": 3597 + }, + { + "epoch": 0.3367810174568259, + "grad_norm": 23.674104690551758, + "learning_rate": 5e-05, + "loss": 1.2735, + "num_input_tokens_seen": 238305176, + "step": 3598 + }, + { + "epoch": 0.3367810174568259, + "loss": 1.3926182985305786, + "loss_ce": 0.004923042841255665, + "loss_iou": 0.56640625, + "loss_num": 0.051513671875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 238305176, + "step": 3598 + }, + { + "epoch": 0.3368746197407217, + "grad_norm": 22.08316421508789, + "learning_rate": 5e-05, + "loss": 1.336, + "num_input_tokens_seen": 238371956, + "step": 3599 + }, + { + "epoch": 0.3368746197407217, + "loss": 1.3277714252471924, + "loss_ce": 0.004529254510998726, + "loss_iou": 0.53125, + "loss_num": 0.0517578125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 238371956, + "step": 3599 + }, + { + "epoch": 0.3369682220246174, + "grad_norm": 23.223655700683594, + "learning_rate": 5e-05, + "loss": 1.3301, + "num_input_tokens_seen": 238438224, + "step": 3600 + }, + { + "epoch": 0.3369682220246174, + "loss": 1.1885607242584229, + "loss_ce": 0.01033812016248703, + "loss_iou": 0.474609375, + "loss_num": 0.04541015625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 238438224, + "step": 3600 + }, + { + "epoch": 0.3370618243085131, + "grad_norm": 23.214136123657227, + "learning_rate": 5e-05, + "loss": 1.3091, + "num_input_tokens_seen": 238504992, + "step": 3601 + }, + { + "epoch": 0.3370618243085131, + "loss": 1.3695616722106934, + "loss_ce": 0.0062803965993225574, + "loss_iou": 0.54296875, + "loss_num": 0.05615234375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 238504992, + "step": 3601 + }, + { + "epoch": 0.3371554265924089, + "grad_norm": 32.63926696777344, + "learning_rate": 5e-05, + "loss": 1.5392, + "num_input_tokens_seen": 238570780, + "step": 3602 + }, + { + "epoch": 0.3371554265924089, + "loss": 1.5200865268707275, + "loss_ce": 0.004949766211211681, + "loss_iou": 0.62890625, + "loss_num": 0.051513671875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 238570780, + "step": 3602 + }, + { + "epoch": 0.3372490288763046, + "grad_norm": 40.30387496948242, + "learning_rate": 5e-05, + "loss": 1.6432, + "num_input_tokens_seen": 238637600, + "step": 3603 + }, + { + "epoch": 0.3372490288763046, + "loss": 1.5404798984527588, + "loss_ce": 0.010206448845565319, + "loss_iou": 0.6484375, + "loss_num": 0.04638671875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 238637600, + "step": 3603 + }, + { + "epoch": 0.3373426311602003, + "grad_norm": 22.046640396118164, + "learning_rate": 5e-05, + "loss": 1.6809, + "num_input_tokens_seen": 238703548, + "step": 3604 + }, + { + "epoch": 0.3373426311602003, + "loss": 1.6503952741622925, + "loss_ce": 0.003910881467163563, + "loss_iou": 0.70703125, + "loss_num": 0.046875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 238703548, + "step": 3604 + }, + { + "epoch": 0.337436233444096, + "grad_norm": 10.93876838684082, + "learning_rate": 5e-05, + "loss": 1.301, + "num_input_tokens_seen": 238770456, + "step": 3605 + }, + { + "epoch": 0.337436233444096, + "loss": 1.4645330905914307, + "loss_ce": 0.007501896936446428, + "loss_iou": 0.58984375, + "loss_num": 0.05517578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 238770456, + "step": 3605 + }, + { + "epoch": 0.3375298357279918, + "grad_norm": 91.18254852294922, + "learning_rate": 5e-05, + "loss": 1.2765, + "num_input_tokens_seen": 238837304, + "step": 3606 + }, + { + "epoch": 0.3375298357279918, + "loss": 1.205566167831421, + "loss_ce": 0.006347442977130413, + "loss_iou": 0.478515625, + "loss_num": 0.048583984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 238837304, + "step": 3606 + }, + { + "epoch": 0.3376234380118875, + "grad_norm": 32.3939323425293, + "learning_rate": 5e-05, + "loss": 1.4283, + "num_input_tokens_seen": 238904392, + "step": 3607 + }, + { + "epoch": 0.3376234380118875, + "loss": 1.3657934665679932, + "loss_ce": 0.005441934801638126, + "loss_iou": 0.578125, + "loss_num": 0.04150390625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 238904392, + "step": 3607 + }, + { + "epoch": 0.3377170402957832, + "grad_norm": 48.57048416137695, + "learning_rate": 5e-05, + "loss": 1.4798, + "num_input_tokens_seen": 238969948, + "step": 3608 + }, + { + "epoch": 0.3377170402957832, + "loss": 1.5589087009429932, + "loss_ce": 0.005197668448090553, + "loss_iou": 0.6328125, + "loss_num": 0.05859375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 238969948, + "step": 3608 + }, + { + "epoch": 0.33781064257967897, + "grad_norm": 25.70297622680664, + "learning_rate": 5e-05, + "loss": 1.3151, + "num_input_tokens_seen": 239035680, + "step": 3609 + }, + { + "epoch": 0.33781064257967897, + "loss": 1.46964430809021, + "loss_ce": 0.006265445612370968, + "loss_iou": 0.6015625, + "loss_num": 0.0517578125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 239035680, + "step": 3609 + }, + { + "epoch": 0.3379042448635747, + "grad_norm": 23.70758628845215, + "learning_rate": 5e-05, + "loss": 1.4996, + "num_input_tokens_seen": 239102592, + "step": 3610 + }, + { + "epoch": 0.3379042448635747, + "loss": 1.3945214748382568, + "loss_ce": 0.0058496082201600075, + "loss_iou": 0.55859375, + "loss_num": 0.0546875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 239102592, + "step": 3610 + }, + { + "epoch": 0.3379978471474704, + "grad_norm": 31.09476089477539, + "learning_rate": 5e-05, + "loss": 1.3197, + "num_input_tokens_seen": 239168452, + "step": 3611 + }, + { + "epoch": 0.3379978471474704, + "loss": 1.6598644256591797, + "loss_ce": 0.00898553803563118, + "loss_iou": 0.671875, + "loss_num": 0.061767578125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 239168452, + "step": 3611 + }, + { + "epoch": 0.3380914494313661, + "grad_norm": 28.302125930786133, + "learning_rate": 5e-05, + "loss": 1.4817, + "num_input_tokens_seen": 239235148, + "step": 3612 + }, + { + "epoch": 0.3380914494313661, + "loss": 1.4059725999832153, + "loss_ce": 0.004605429247021675, + "loss_iou": 0.5859375, + "loss_num": 0.0458984375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 239235148, + "step": 3612 + }, + { + "epoch": 0.33818505171526186, + "grad_norm": 35.3673095703125, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 239300912, + "step": 3613 + }, + { + "epoch": 0.33818505171526186, + "loss": 1.2753794193267822, + "loss_ce": 0.0043832845985889435, + "loss_iou": 0.515625, + "loss_num": 0.04736328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 239300912, + "step": 3613 + }, + { + "epoch": 0.3382786539991576, + "grad_norm": 28.36713981628418, + "learning_rate": 5e-05, + "loss": 1.5699, + "num_input_tokens_seen": 239367352, + "step": 3614 + }, + { + "epoch": 0.3382786539991576, + "loss": 1.4123287200927734, + "loss_ce": 0.007055333815515041, + "loss_iou": 0.5625, + "loss_num": 0.05615234375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 239367352, + "step": 3614 + }, + { + "epoch": 0.3383722562830533, + "grad_norm": 14.07665729522705, + "learning_rate": 5e-05, + "loss": 1.3271, + "num_input_tokens_seen": 239433836, + "step": 3615 + }, + { + "epoch": 0.3383722562830533, + "loss": 1.2867792844772339, + "loss_ce": 0.0065058632753789425, + "loss_iou": 0.486328125, + "loss_num": 0.06103515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 239433836, + "step": 3615 + }, + { + "epoch": 0.33846585856694905, + "grad_norm": 25.80449104309082, + "learning_rate": 5e-05, + "loss": 1.2404, + "num_input_tokens_seen": 239500736, + "step": 3616 + }, + { + "epoch": 0.33846585856694905, + "loss": 1.094995141029358, + "loss_ce": 0.005639633163809776, + "loss_iou": 0.494140625, + "loss_num": 0.02001953125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 239500736, + "step": 3616 + }, + { + "epoch": 0.33855946085084476, + "grad_norm": 38.415252685546875, + "learning_rate": 5e-05, + "loss": 1.2547, + "num_input_tokens_seen": 239566704, + "step": 3617 + }, + { + "epoch": 0.33855946085084476, + "loss": 1.4759812355041504, + "loss_ce": 0.005766476970165968, + "loss_iou": 0.6171875, + "loss_num": 0.04638671875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 239566704, + "step": 3617 + }, + { + "epoch": 0.3386530631347405, + "grad_norm": 24.202896118164062, + "learning_rate": 5e-05, + "loss": 1.4835, + "num_input_tokens_seen": 239633692, + "step": 3618 + }, + { + "epoch": 0.3386530631347405, + "loss": 1.4221255779266357, + "loss_ce": 0.0022036314476281404, + "loss_iou": 0.625, + "loss_num": 0.03466796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 239633692, + "step": 3618 + }, + { + "epoch": 0.33874666541863624, + "grad_norm": 23.281497955322266, + "learning_rate": 5e-05, + "loss": 1.2984, + "num_input_tokens_seen": 239699932, + "step": 3619 + }, + { + "epoch": 0.33874666541863624, + "loss": 1.2486505508422852, + "loss_ce": 0.004021669737994671, + "loss_iou": 0.486328125, + "loss_num": 0.054443359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 239699932, + "step": 3619 + }, + { + "epoch": 0.33884026770253195, + "grad_norm": 26.224349975585938, + "learning_rate": 5e-05, + "loss": 1.1741, + "num_input_tokens_seen": 239766256, + "step": 3620 + }, + { + "epoch": 0.33884026770253195, + "loss": 1.2733521461486816, + "loss_ce": 0.006262177601456642, + "loss_iou": 0.53125, + "loss_num": 0.041748046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 239766256, + "step": 3620 + }, + { + "epoch": 0.33893386998642766, + "grad_norm": 36.211002349853516, + "learning_rate": 5e-05, + "loss": 1.5066, + "num_input_tokens_seen": 239833364, + "step": 3621 + }, + { + "epoch": 0.33893386998642766, + "loss": 1.398667812347412, + "loss_ce": 0.004136573988944292, + "loss_iou": 0.55859375, + "loss_num": 0.0546875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 239833364, + "step": 3621 + }, + { + "epoch": 0.33902747227032337, + "grad_norm": 19.57887840270996, + "learning_rate": 5e-05, + "loss": 1.5203, + "num_input_tokens_seen": 239900024, + "step": 3622 + }, + { + "epoch": 0.33902747227032337, + "loss": 1.3430957794189453, + "loss_ce": 0.005205217748880386, + "loss_iou": 0.5625, + "loss_num": 0.04248046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 239900024, + "step": 3622 + }, + { + "epoch": 0.33912107455421914, + "grad_norm": 21.123933792114258, + "learning_rate": 5e-05, + "loss": 1.3294, + "num_input_tokens_seen": 239966632, + "step": 3623 + }, + { + "epoch": 0.33912107455421914, + "loss": 1.5254234075546265, + "loss_ce": 0.0024741683155298233, + "loss_iou": 0.6171875, + "loss_num": 0.05859375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 239966632, + "step": 3623 + }, + { + "epoch": 0.33921467683811485, + "grad_norm": 23.80128288269043, + "learning_rate": 5e-05, + "loss": 1.3695, + "num_input_tokens_seen": 240033332, + "step": 3624 + }, + { + "epoch": 0.33921467683811485, + "loss": 1.3639514446258545, + "loss_ce": 0.006041194777935743, + "loss_iou": 0.56640625, + "loss_num": 0.044921875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 240033332, + "step": 3624 + }, + { + "epoch": 0.33930827912201056, + "grad_norm": 20.2900390625, + "learning_rate": 5e-05, + "loss": 1.3114, + "num_input_tokens_seen": 240099780, + "step": 3625 + }, + { + "epoch": 0.33930827912201056, + "loss": 1.3428840637207031, + "loss_ce": 0.008411416783928871, + "loss_iou": 0.51953125, + "loss_num": 0.058349609375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 240099780, + "step": 3625 + }, + { + "epoch": 0.3394018814059063, + "grad_norm": 47.4752082824707, + "learning_rate": 5e-05, + "loss": 1.1964, + "num_input_tokens_seen": 240165780, + "step": 3626 + }, + { + "epoch": 0.3394018814059063, + "loss": 1.3032163381576538, + "loss_ce": 0.005853088106960058, + "loss_iou": 0.546875, + "loss_num": 0.041015625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 240165780, + "step": 3626 + }, + { + "epoch": 0.33949548368980204, + "grad_norm": 40.83226013183594, + "learning_rate": 5e-05, + "loss": 1.4323, + "num_input_tokens_seen": 240232156, + "step": 3627 + }, + { + "epoch": 0.33949548368980204, + "loss": 1.4385390281677246, + "loss_ce": 0.004945231601595879, + "loss_iou": 0.61328125, + "loss_num": 0.041748046875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 240232156, + "step": 3627 + }, + { + "epoch": 0.33958908597369775, + "grad_norm": 28.174959182739258, + "learning_rate": 5e-05, + "loss": 1.2689, + "num_input_tokens_seen": 240298552, + "step": 3628 + }, + { + "epoch": 0.33958908597369775, + "loss": 1.3345706462860107, + "loss_ce": 0.008398830890655518, + "loss_iou": 0.5859375, + "loss_num": 0.03125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 240298552, + "step": 3628 + }, + { + "epoch": 0.3396826882575935, + "grad_norm": 27.069580078125, + "learning_rate": 5e-05, + "loss": 1.4573, + "num_input_tokens_seen": 240364116, + "step": 3629 + }, + { + "epoch": 0.3396826882575935, + "loss": 1.4937152862548828, + "loss_ce": 0.002504334319382906, + "loss_iou": 0.62109375, + "loss_num": 0.050537109375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 240364116, + "step": 3629 + }, + { + "epoch": 0.3397762905414892, + "grad_norm": 28.511459350585938, + "learning_rate": 5e-05, + "loss": 1.3544, + "num_input_tokens_seen": 240431792, + "step": 3630 + }, + { + "epoch": 0.3397762905414892, + "loss": 1.3059096336364746, + "loss_ce": 0.004151838831603527, + "loss_iou": 0.5234375, + "loss_num": 0.051513671875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 240431792, + "step": 3630 + }, + { + "epoch": 0.33986989282538493, + "grad_norm": 26.77628517150879, + "learning_rate": 5e-05, + "loss": 1.4932, + "num_input_tokens_seen": 240498624, + "step": 3631 + }, + { + "epoch": 0.33986989282538493, + "loss": 1.4641647338867188, + "loss_ce": 0.007621804252266884, + "loss_iou": 0.59375, + "loss_num": 0.053466796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 240498624, + "step": 3631 + }, + { + "epoch": 0.33996349510928064, + "grad_norm": 24.17523765563965, + "learning_rate": 5e-05, + "loss": 1.345, + "num_input_tokens_seen": 240565364, + "step": 3632 + }, + { + "epoch": 0.33996349510928064, + "loss": 1.3431411981582642, + "loss_ce": 0.009156826883554459, + "loss_iou": 0.578125, + "loss_num": 0.0361328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 240565364, + "step": 3632 + }, + { + "epoch": 0.3400570973931764, + "grad_norm": 22.559850692749023, + "learning_rate": 5e-05, + "loss": 1.3459, + "num_input_tokens_seen": 240631580, + "step": 3633 + }, + { + "epoch": 0.3400570973931764, + "loss": 1.2266149520874023, + "loss_ce": 0.007864905521273613, + "loss_iou": 0.48046875, + "loss_num": 0.05126953125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 240631580, + "step": 3633 + }, + { + "epoch": 0.3401506996770721, + "grad_norm": 37.38408660888672, + "learning_rate": 5e-05, + "loss": 1.2722, + "num_input_tokens_seen": 240697760, + "step": 3634 + }, + { + "epoch": 0.3401506996770721, + "loss": 1.2712650299072266, + "loss_ce": 0.005640106741338968, + "loss_iou": 0.53125, + "loss_num": 0.041259765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 240697760, + "step": 3634 + }, + { + "epoch": 0.34024430196096783, + "grad_norm": 27.79058265686035, + "learning_rate": 5e-05, + "loss": 1.3727, + "num_input_tokens_seen": 240763544, + "step": 3635 + }, + { + "epoch": 0.34024430196096783, + "loss": 1.2752653360366821, + "loss_ce": 0.006405411288142204, + "loss_iou": 0.5078125, + "loss_num": 0.05029296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 240763544, + "step": 3635 + }, + { + "epoch": 0.3403379042448636, + "grad_norm": 19.13265037536621, + "learning_rate": 5e-05, + "loss": 1.1719, + "num_input_tokens_seen": 240829732, + "step": 3636 + }, + { + "epoch": 0.3403379042448636, + "loss": 1.0187313556671143, + "loss_ce": 0.007500950712710619, + "loss_iou": 0.396484375, + "loss_num": 0.04345703125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 240829732, + "step": 3636 + }, + { + "epoch": 0.3404315065287593, + "grad_norm": 18.1207218170166, + "learning_rate": 5e-05, + "loss": 1.3044, + "num_input_tokens_seen": 240895856, + "step": 3637 + }, + { + "epoch": 0.3404315065287593, + "loss": 1.4831037521362305, + "loss_ce": 0.008006195537745953, + "loss_iou": 0.6015625, + "loss_num": 0.0546875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 240895856, + "step": 3637 + }, + { + "epoch": 0.340525108812655, + "grad_norm": 17.262310028076172, + "learning_rate": 5e-05, + "loss": 1.0922, + "num_input_tokens_seen": 240961812, + "step": 3638 + }, + { + "epoch": 0.340525108812655, + "loss": 0.9637893438339233, + "loss_ce": 0.006147765088826418, + "loss_iou": 0.408203125, + "loss_num": 0.0283203125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 240961812, + "step": 3638 + }, + { + "epoch": 0.34061871109655073, + "grad_norm": 22.514741897583008, + "learning_rate": 5e-05, + "loss": 1.2908, + "num_input_tokens_seen": 241026360, + "step": 3639 + }, + { + "epoch": 0.34061871109655073, + "loss": 1.1033295392990112, + "loss_ce": 0.004422076512128115, + "loss_iou": 0.38671875, + "loss_num": 0.0654296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 241026360, + "step": 3639 + }, + { + "epoch": 0.3407123133804465, + "grad_norm": 30.53278350830078, + "learning_rate": 5e-05, + "loss": 1.2993, + "num_input_tokens_seen": 241093012, + "step": 3640 + }, + { + "epoch": 0.3407123133804465, + "loss": 1.4144235849380493, + "loss_ce": 0.0042672958225011826, + "loss_iou": 0.55078125, + "loss_num": 0.06201171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 241093012, + "step": 3640 + }, + { + "epoch": 0.3408059156643422, + "grad_norm": 92.73212432861328, + "learning_rate": 5e-05, + "loss": 1.2742, + "num_input_tokens_seen": 241159044, + "step": 3641 + }, + { + "epoch": 0.3408059156643422, + "loss": 1.4088337421417236, + "loss_ce": 0.006978189572691917, + "loss_iou": 0.5859375, + "loss_num": 0.04541015625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 241159044, + "step": 3641 + }, + { + "epoch": 0.3408995179482379, + "grad_norm": 46.380210876464844, + "learning_rate": 5e-05, + "loss": 1.4636, + "num_input_tokens_seen": 241225988, + "step": 3642 + }, + { + "epoch": 0.3408995179482379, + "loss": 1.8216707706451416, + "loss_ce": 0.006241173017770052, + "loss_iou": 0.734375, + "loss_num": 0.068359375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 241225988, + "step": 3642 + }, + { + "epoch": 0.3409931202321337, + "grad_norm": 32.543819427490234, + "learning_rate": 5e-05, + "loss": 1.1491, + "num_input_tokens_seen": 241292644, + "step": 3643 + }, + { + "epoch": 0.3409931202321337, + "loss": 1.2192710638046265, + "loss_ce": 0.006868735421448946, + "loss_iou": 0.490234375, + "loss_num": 0.0458984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 241292644, + "step": 3643 + }, + { + "epoch": 0.3410867225160294, + "grad_norm": 89.16029357910156, + "learning_rate": 5e-05, + "loss": 1.8464, + "num_input_tokens_seen": 241359000, + "step": 3644 + }, + { + "epoch": 0.3410867225160294, + "loss": 1.9885772466659546, + "loss_ce": 0.004202256910502911, + "loss_iou": 0.76171875, + "loss_num": 0.091796875, + "loss_xval": 1.984375, + "num_input_tokens_seen": 241359000, + "step": 3644 + }, + { + "epoch": 0.3411803247999251, + "grad_norm": 14.490792274475098, + "learning_rate": 5e-05, + "loss": 1.3464, + "num_input_tokens_seen": 241425052, + "step": 3645 + }, + { + "epoch": 0.3411803247999251, + "loss": 1.4264261722564697, + "loss_ce": 0.009434008970856667, + "loss_iou": 0.5546875, + "loss_num": 0.062255859375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 241425052, + "step": 3645 + }, + { + "epoch": 0.34127392708382087, + "grad_norm": 20.671415328979492, + "learning_rate": 5e-05, + "loss": 1.0687, + "num_input_tokens_seen": 241490788, + "step": 3646 + }, + { + "epoch": 0.34127392708382087, + "loss": 0.9522599577903748, + "loss_ce": 0.0050554098561406136, + "loss_iou": 0.376953125, + "loss_num": 0.038330078125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 241490788, + "step": 3646 + }, + { + "epoch": 0.3413675293677166, + "grad_norm": 31.99077606201172, + "learning_rate": 5e-05, + "loss": 1.3249, + "num_input_tokens_seen": 241557120, + "step": 3647 + }, + { + "epoch": 0.3413675293677166, + "loss": 1.1573506593704224, + "loss_ce": 0.007936619222164154, + "loss_iou": 0.4609375, + "loss_num": 0.04541015625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 241557120, + "step": 3647 + }, + { + "epoch": 0.3414611316516123, + "grad_norm": 41.82197570800781, + "learning_rate": 5e-05, + "loss": 1.5867, + "num_input_tokens_seen": 241624184, + "step": 3648 + }, + { + "epoch": 0.3414611316516123, + "loss": 1.692509651184082, + "loss_ce": 0.005986269097775221, + "loss_iou": 0.7421875, + "loss_num": 0.041015625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 241624184, + "step": 3648 + }, + { + "epoch": 0.341554733935508, + "grad_norm": 35.49443435668945, + "learning_rate": 5e-05, + "loss": 1.6186, + "num_input_tokens_seen": 241690640, + "step": 3649 + }, + { + "epoch": 0.341554733935508, + "loss": 1.6188666820526123, + "loss_ce": 0.01281813345849514, + "loss_iou": 0.6953125, + "loss_num": 0.0419921875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 241690640, + "step": 3649 + }, + { + "epoch": 0.34164833621940377, + "grad_norm": 13.038924217224121, + "learning_rate": 5e-05, + "loss": 1.4472, + "num_input_tokens_seen": 241756420, + "step": 3650 + }, + { + "epoch": 0.34164833621940377, + "loss": 1.6883447170257568, + "loss_ce": 0.005727503448724747, + "loss_iou": 0.671875, + "loss_num": 0.06689453125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 241756420, + "step": 3650 + }, + { + "epoch": 0.3417419385032995, + "grad_norm": 25.877485275268555, + "learning_rate": 5e-05, + "loss": 1.2876, + "num_input_tokens_seen": 241822736, + "step": 3651 + }, + { + "epoch": 0.3417419385032995, + "loss": 1.2723809480667114, + "loss_ce": 0.0033379076048731804, + "loss_iou": 0.52734375, + "loss_num": 0.042236328125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 241822736, + "step": 3651 + }, + { + "epoch": 0.3418355407871952, + "grad_norm": 23.364559173583984, + "learning_rate": 5e-05, + "loss": 1.4992, + "num_input_tokens_seen": 241889040, + "step": 3652 + }, + { + "epoch": 0.3418355407871952, + "loss": 1.4105358123779297, + "loss_ce": 0.007703776005655527, + "loss_iou": 0.59375, + "loss_num": 0.04345703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 241889040, + "step": 3652 + }, + { + "epoch": 0.34192914307109096, + "grad_norm": 36.80274963378906, + "learning_rate": 5e-05, + "loss": 1.2887, + "num_input_tokens_seen": 241954656, + "step": 3653 + }, + { + "epoch": 0.34192914307109096, + "loss": 1.308194637298584, + "loss_ce": 0.007413332350552082, + "loss_iou": 0.51953125, + "loss_num": 0.052490234375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 241954656, + "step": 3653 + }, + { + "epoch": 0.34202274535498667, + "grad_norm": 87.49969482421875, + "learning_rate": 5e-05, + "loss": 1.5953, + "num_input_tokens_seen": 242020348, + "step": 3654 + }, + { + "epoch": 0.34202274535498667, + "loss": 1.5683457851409912, + "loss_ce": 0.004869316704571247, + "loss_iou": 0.66015625, + "loss_num": 0.049072265625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 242020348, + "step": 3654 + }, + { + "epoch": 0.3421163476388824, + "grad_norm": 25.59351921081543, + "learning_rate": 5e-05, + "loss": 1.2982, + "num_input_tokens_seen": 242087588, + "step": 3655 + }, + { + "epoch": 0.3421163476388824, + "loss": 1.184438943862915, + "loss_ce": 0.004263220354914665, + "loss_iou": 0.49609375, + "loss_num": 0.037841796875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 242087588, + "step": 3655 + }, + { + "epoch": 0.3422099499227781, + "grad_norm": 21.046737670898438, + "learning_rate": 5e-05, + "loss": 1.4828, + "num_input_tokens_seen": 242153320, + "step": 3656 + }, + { + "epoch": 0.3422099499227781, + "loss": 1.4882756471633911, + "loss_ce": 0.0029240858275443316, + "loss_iou": 0.5859375, + "loss_num": 0.0625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 242153320, + "step": 3656 + }, + { + "epoch": 0.34230355220667386, + "grad_norm": 46.43707275390625, + "learning_rate": 5e-05, + "loss": 1.2706, + "num_input_tokens_seen": 242219364, + "step": 3657 + }, + { + "epoch": 0.34230355220667386, + "loss": 0.9916031360626221, + "loss_ce": 0.004908815957605839, + "loss_iou": 0.396484375, + "loss_num": 0.0390625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 242219364, + "step": 3657 + }, + { + "epoch": 0.34239715449056957, + "grad_norm": 27.1610107421875, + "learning_rate": 5e-05, + "loss": 1.3813, + "num_input_tokens_seen": 242285964, + "step": 3658 + }, + { + "epoch": 0.34239715449056957, + "loss": 1.4464139938354492, + "loss_ce": 0.009402213618159294, + "loss_iou": 0.61328125, + "loss_num": 0.04248046875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 242285964, + "step": 3658 + }, + { + "epoch": 0.3424907567744653, + "grad_norm": 13.915948867797852, + "learning_rate": 5e-05, + "loss": 1.0621, + "num_input_tokens_seen": 242352340, + "step": 3659 + }, + { + "epoch": 0.3424907567744653, + "loss": 1.1372686624526978, + "loss_ce": 0.005920969881117344, + "loss_iou": 0.46875, + "loss_num": 0.038818359375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 242352340, + "step": 3659 + }, + { + "epoch": 0.34258435905836104, + "grad_norm": 18.808433532714844, + "learning_rate": 5e-05, + "loss": 1.4462, + "num_input_tokens_seen": 242419068, + "step": 3660 + }, + { + "epoch": 0.34258435905836104, + "loss": 1.2946178913116455, + "loss_ce": 0.007508429698646069, + "loss_iou": 0.5234375, + "loss_num": 0.04833984375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 242419068, + "step": 3660 + }, + { + "epoch": 0.34267796134225675, + "grad_norm": 29.82279396057129, + "learning_rate": 5e-05, + "loss": 1.2986, + "num_input_tokens_seen": 242485832, + "step": 3661 + }, + { + "epoch": 0.34267796134225675, + "loss": 1.252081036567688, + "loss_ce": 0.005010711029171944, + "loss_iou": 0.53515625, + "loss_num": 0.035400390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 242485832, + "step": 3661 + }, + { + "epoch": 0.34277156362615246, + "grad_norm": 28.067873001098633, + "learning_rate": 5e-05, + "loss": 1.5593, + "num_input_tokens_seen": 242552972, + "step": 3662 + }, + { + "epoch": 0.34277156362615246, + "loss": 1.4852641820907593, + "loss_ce": 0.0038188102189451456, + "loss_iou": 0.6015625, + "loss_num": 0.054931640625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 242552972, + "step": 3662 + }, + { + "epoch": 0.34286516591004823, + "grad_norm": 36.84526824951172, + "learning_rate": 5e-05, + "loss": 1.3071, + "num_input_tokens_seen": 242618832, + "step": 3663 + }, + { + "epoch": 0.34286516591004823, + "loss": 1.2323442697525024, + "loss_ce": 0.00431685009971261, + "loss_iou": 0.46875, + "loss_num": 0.05859375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 242618832, + "step": 3663 + }, + { + "epoch": 0.34295876819394394, + "grad_norm": 17.441896438598633, + "learning_rate": 5e-05, + "loss": 1.4414, + "num_input_tokens_seen": 242684976, + "step": 3664 + }, + { + "epoch": 0.34295876819394394, + "loss": 1.5163402557373047, + "loss_ce": 0.0038890610449016094, + "loss_iou": 0.609375, + "loss_num": 0.05810546875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 242684976, + "step": 3664 + }, + { + "epoch": 0.34305237047783965, + "grad_norm": 28.150863647460938, + "learning_rate": 5e-05, + "loss": 1.3833, + "num_input_tokens_seen": 242751128, + "step": 3665 + }, + { + "epoch": 0.34305237047783965, + "loss": 1.174673318862915, + "loss_ce": 0.008169415406882763, + "loss_iou": 0.48828125, + "loss_num": 0.038330078125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 242751128, + "step": 3665 + }, + { + "epoch": 0.34314597276173536, + "grad_norm": 29.392072677612305, + "learning_rate": 5e-05, + "loss": 1.4848, + "num_input_tokens_seen": 242817620, + "step": 3666 + }, + { + "epoch": 0.34314597276173536, + "loss": 1.4709686040878296, + "loss_ce": 0.003195198019966483, + "loss_iou": 0.55859375, + "loss_num": 0.0703125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 242817620, + "step": 3666 + }, + { + "epoch": 0.34323957504563113, + "grad_norm": 23.619131088256836, + "learning_rate": 5e-05, + "loss": 1.4531, + "num_input_tokens_seen": 242883984, + "step": 3667 + }, + { + "epoch": 0.34323957504563113, + "loss": 1.5173687934875488, + "loss_ce": 0.007603058125823736, + "loss_iou": 0.62109375, + "loss_num": 0.053955078125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 242883984, + "step": 3667 + }, + { + "epoch": 0.34333317732952684, + "grad_norm": 118.72444152832031, + "learning_rate": 5e-05, + "loss": 1.589, + "num_input_tokens_seen": 242951052, + "step": 3668 + }, + { + "epoch": 0.34333317732952684, + "loss": 1.6637331247329712, + "loss_ce": 0.005529981106519699, + "loss_iou": 0.65625, + "loss_num": 0.06884765625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 242951052, + "step": 3668 + }, + { + "epoch": 0.34342677961342255, + "grad_norm": 63.13013458251953, + "learning_rate": 5e-05, + "loss": 1.5791, + "num_input_tokens_seen": 243017768, + "step": 3669 + }, + { + "epoch": 0.34342677961342255, + "loss": 1.6526031494140625, + "loss_ce": 0.009048559702932835, + "loss_iou": 0.69140625, + "loss_num": 0.052001953125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 243017768, + "step": 3669 + }, + { + "epoch": 0.3435203818973183, + "grad_norm": 25.43216323852539, + "learning_rate": 5e-05, + "loss": 1.1632, + "num_input_tokens_seen": 243083536, + "step": 3670 + }, + { + "epoch": 0.3435203818973183, + "loss": 1.3266937732696533, + "loss_ce": 0.004916415549814701, + "loss_iou": 0.5234375, + "loss_num": 0.0546875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 243083536, + "step": 3670 + }, + { + "epoch": 0.343613984181214, + "grad_norm": 63.88376235961914, + "learning_rate": 5e-05, + "loss": 1.5546, + "num_input_tokens_seen": 243149552, + "step": 3671 + }, + { + "epoch": 0.343613984181214, + "loss": 1.7038636207580566, + "loss_ce": 0.006598077714443207, + "loss_iou": 0.703125, + "loss_num": 0.05859375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 243149552, + "step": 3671 + }, + { + "epoch": 0.34370758646510974, + "grad_norm": 30.116592407226562, + "learning_rate": 5e-05, + "loss": 1.3339, + "num_input_tokens_seen": 243215392, + "step": 3672 + }, + { + "epoch": 0.34370758646510974, + "loss": 1.22581148147583, + "loss_ce": 0.007061444688588381, + "loss_iou": 0.50390625, + "loss_num": 0.041748046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 243215392, + "step": 3672 + }, + { + "epoch": 0.34380118874900545, + "grad_norm": 25.52834129333496, + "learning_rate": 5e-05, + "loss": 1.2373, + "num_input_tokens_seen": 243282692, + "step": 3673 + }, + { + "epoch": 0.34380118874900545, + "loss": 1.2232282161712646, + "loss_ce": 0.004966398235410452, + "loss_iou": 0.494140625, + "loss_num": 0.04638671875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 243282692, + "step": 3673 + }, + { + "epoch": 0.3438947910329012, + "grad_norm": 35.763919830322266, + "learning_rate": 5e-05, + "loss": 1.5312, + "num_input_tokens_seen": 243349264, + "step": 3674 + }, + { + "epoch": 0.3438947910329012, + "loss": 1.647485613822937, + "loss_ce": 0.006860640831291676, + "loss_iou": 0.65234375, + "loss_num": 0.06689453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 243349264, + "step": 3674 + }, + { + "epoch": 0.3439883933167969, + "grad_norm": 30.420204162597656, + "learning_rate": 5e-05, + "loss": 1.5644, + "num_input_tokens_seen": 243415048, + "step": 3675 + }, + { + "epoch": 0.3439883933167969, + "loss": 1.7645032405853271, + "loss_ce": 0.010596971958875656, + "loss_iou": 0.71484375, + "loss_num": 0.06494140625, + "loss_xval": 1.75, + "num_input_tokens_seen": 243415048, + "step": 3675 + }, + { + "epoch": 0.34408199560069264, + "grad_norm": 18.679962158203125, + "learning_rate": 5e-05, + "loss": 1.4205, + "num_input_tokens_seen": 243480824, + "step": 3676 + }, + { + "epoch": 0.34408199560069264, + "loss": 1.3647938966751099, + "loss_ce": 0.005418871063739061, + "loss_iou": 0.52734375, + "loss_num": 0.060546875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 243480824, + "step": 3676 + }, + { + "epoch": 0.3441755978845884, + "grad_norm": 39.677101135253906, + "learning_rate": 5e-05, + "loss": 1.3917, + "num_input_tokens_seen": 243547072, + "step": 3677 + }, + { + "epoch": 0.3441755978845884, + "loss": 1.3339930772781372, + "loss_ce": 0.005135707091540098, + "loss_iou": 0.50390625, + "loss_num": 0.0654296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 243547072, + "step": 3677 + }, + { + "epoch": 0.3442692001684841, + "grad_norm": 32.127342224121094, + "learning_rate": 5e-05, + "loss": 1.7895, + "num_input_tokens_seen": 243613868, + "step": 3678 + }, + { + "epoch": 0.3442692001684841, + "loss": 1.6922013759613037, + "loss_ce": 0.006654429715126753, + "loss_iou": 0.703125, + "loss_num": 0.055419921875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 243613868, + "step": 3678 + }, + { + "epoch": 0.3443628024523798, + "grad_norm": 18.287097930908203, + "learning_rate": 5e-05, + "loss": 1.4034, + "num_input_tokens_seen": 243680388, + "step": 3679 + }, + { + "epoch": 0.3443628024523798, + "loss": 1.375736951828003, + "loss_ce": 0.0046431622467935085, + "loss_iou": 0.498046875, + "loss_num": 0.07470703125, + "loss_xval": 1.375, + "num_input_tokens_seen": 243680388, + "step": 3679 + }, + { + "epoch": 0.3444564047362756, + "grad_norm": 11.223101615905762, + "learning_rate": 5e-05, + "loss": 1.3468, + "num_input_tokens_seen": 243746560, + "step": 3680 + }, + { + "epoch": 0.3444564047362756, + "loss": 1.4266433715820312, + "loss_ce": 0.0037918402813374996, + "loss_iou": 0.498046875, + "loss_num": 0.0849609375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 243746560, + "step": 3680 + }, + { + "epoch": 0.3445500070201713, + "grad_norm": 146.6287384033203, + "learning_rate": 5e-05, + "loss": 1.3631, + "num_input_tokens_seen": 243812084, + "step": 3681 + }, + { + "epoch": 0.3445500070201713, + "loss": 1.281965732574463, + "loss_ce": 0.0055985464714467525, + "loss_iou": 0.515625, + "loss_num": 0.049072265625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 243812084, + "step": 3681 + }, + { + "epoch": 0.344643609304067, + "grad_norm": 25.403162002563477, + "learning_rate": 5e-05, + "loss": 1.3416, + "num_input_tokens_seen": 243878352, + "step": 3682 + }, + { + "epoch": 0.344643609304067, + "loss": 1.209255576133728, + "loss_ce": 0.01052510179579258, + "loss_iou": 0.490234375, + "loss_num": 0.04345703125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 243878352, + "step": 3682 + }, + { + "epoch": 0.3447372115879627, + "grad_norm": 26.909154891967773, + "learning_rate": 5e-05, + "loss": 1.1672, + "num_input_tokens_seen": 243944912, + "step": 3683 + }, + { + "epoch": 0.3447372115879627, + "loss": 1.17277193069458, + "loss_ce": 0.004314985126256943, + "loss_iou": 0.4765625, + "loss_num": 0.04296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 243944912, + "step": 3683 + }, + { + "epoch": 0.3448308138718585, + "grad_norm": 41.67969512939453, + "learning_rate": 5e-05, + "loss": 1.7167, + "num_input_tokens_seen": 244010936, + "step": 3684 + }, + { + "epoch": 0.3448308138718585, + "loss": 1.7872049808502197, + "loss_ce": 0.004978567361831665, + "loss_iou": 0.69921875, + "loss_num": 0.0771484375, + "loss_xval": 1.78125, + "num_input_tokens_seen": 244010936, + "step": 3684 + }, + { + "epoch": 0.3449244161557542, + "grad_norm": 60.78505325317383, + "learning_rate": 5e-05, + "loss": 1.4952, + "num_input_tokens_seen": 244078052, + "step": 3685 + }, + { + "epoch": 0.3449244161557542, + "loss": 1.5322611331939697, + "loss_ce": 0.00491733755916357, + "loss_iou": 0.640625, + "loss_num": 0.04931640625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 244078052, + "step": 3685 + }, + { + "epoch": 0.3450180184396499, + "grad_norm": 15.89416217803955, + "learning_rate": 5e-05, + "loss": 1.5434, + "num_input_tokens_seen": 244144356, + "step": 3686 + }, + { + "epoch": 0.3450180184396499, + "loss": 1.550642490386963, + "loss_ce": 0.004743984434753656, + "loss_iou": 0.55859375, + "loss_num": 0.0849609375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 244144356, + "step": 3686 + }, + { + "epoch": 0.3451116207235457, + "grad_norm": 40.959346771240234, + "learning_rate": 5e-05, + "loss": 1.3323, + "num_input_tokens_seen": 244211072, + "step": 3687 + }, + { + "epoch": 0.3451116207235457, + "loss": 1.285388469696045, + "loss_ce": 0.0075564635917544365, + "loss_iou": 0.5234375, + "loss_num": 0.04638671875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 244211072, + "step": 3687 + }, + { + "epoch": 0.3452052230074414, + "grad_norm": 25.80829620361328, + "learning_rate": 5e-05, + "loss": 1.3496, + "num_input_tokens_seen": 244277672, + "step": 3688 + }, + { + "epoch": 0.3452052230074414, + "loss": 1.2949481010437012, + "loss_ce": 0.0039324769750237465, + "loss_iou": 0.515625, + "loss_num": 0.051513671875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 244277672, + "step": 3688 + }, + { + "epoch": 0.3452988252913371, + "grad_norm": 26.702003479003906, + "learning_rate": 5e-05, + "loss": 1.4056, + "num_input_tokens_seen": 244343808, + "step": 3689 + }, + { + "epoch": 0.3452988252913371, + "loss": 1.5517548322677612, + "loss_ce": 0.005368147045373917, + "loss_iou": 0.6171875, + "loss_num": 0.06298828125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 244343808, + "step": 3689 + }, + { + "epoch": 0.34539242757523286, + "grad_norm": 36.517913818359375, + "learning_rate": 5e-05, + "loss": 1.7433, + "num_input_tokens_seen": 244410436, + "step": 3690 + }, + { + "epoch": 0.34539242757523286, + "loss": 1.7622140645980835, + "loss_ce": 0.00635466817766428, + "loss_iou": 0.7109375, + "loss_num": 0.06689453125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 244410436, + "step": 3690 + }, + { + "epoch": 0.3454860298591286, + "grad_norm": 22.5693302154541, + "learning_rate": 5e-05, + "loss": 1.7756, + "num_input_tokens_seen": 244477220, + "step": 3691 + }, + { + "epoch": 0.3454860298591286, + "loss": 1.552940845489502, + "loss_ce": 0.005089297890663147, + "loss_iou": 0.66015625, + "loss_num": 0.044677734375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 244477220, + "step": 3691 + }, + { + "epoch": 0.3455796321430243, + "grad_norm": 48.82529067993164, + "learning_rate": 5e-05, + "loss": 1.3523, + "num_input_tokens_seen": 244543112, + "step": 3692 + }, + { + "epoch": 0.3455796321430243, + "loss": 1.4222763776779175, + "loss_ce": 0.003819405334070325, + "loss_iou": 0.55078125, + "loss_num": 0.0625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 244543112, + "step": 3692 + }, + { + "epoch": 0.34567323442692, + "grad_norm": 20.175817489624023, + "learning_rate": 5e-05, + "loss": 1.0903, + "num_input_tokens_seen": 244609400, + "step": 3693 + }, + { + "epoch": 0.34567323442692, + "loss": 1.069154977798462, + "loss_ce": 0.007143290247768164, + "loss_iou": 0.482421875, + "loss_num": 0.019287109375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 244609400, + "step": 3693 + }, + { + "epoch": 0.34576683671081576, + "grad_norm": 20.41504669189453, + "learning_rate": 5e-05, + "loss": 1.3561, + "num_input_tokens_seen": 244675696, + "step": 3694 + }, + { + "epoch": 0.34576683671081576, + "loss": 1.2821524143218994, + "loss_ce": 0.003832231042906642, + "loss_iou": 0.5078125, + "loss_num": 0.0517578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 244675696, + "step": 3694 + }, + { + "epoch": 0.34586043899471147, + "grad_norm": 46.832210540771484, + "learning_rate": 5e-05, + "loss": 1.5007, + "num_input_tokens_seen": 244741120, + "step": 3695 + }, + { + "epoch": 0.34586043899471147, + "loss": 1.5052576065063477, + "loss_ce": 0.00867561437189579, + "loss_iou": 0.486328125, + "loss_num": 0.10546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 244741120, + "step": 3695 + }, + { + "epoch": 0.3459540412786072, + "grad_norm": 24.776247024536133, + "learning_rate": 5e-05, + "loss": 1.4978, + "num_input_tokens_seen": 244807848, + "step": 3696 + }, + { + "epoch": 0.3459540412786072, + "loss": 1.6521716117858887, + "loss_ce": 0.005199096165597439, + "loss_iou": 0.68359375, + "loss_num": 0.055908203125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 244807848, + "step": 3696 + }, + { + "epoch": 0.34604764356250295, + "grad_norm": 19.971397399902344, + "learning_rate": 5e-05, + "loss": 1.3413, + "num_input_tokens_seen": 244875628, + "step": 3697 + }, + { + "epoch": 0.34604764356250295, + "loss": 1.4090442657470703, + "loss_ce": 0.007677052635699511, + "loss_iou": 0.51953125, + "loss_num": 0.07275390625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 244875628, + "step": 3697 + }, + { + "epoch": 0.34614124584639866, + "grad_norm": 35.29273986816406, + "learning_rate": 5e-05, + "loss": 1.4335, + "num_input_tokens_seen": 244941636, + "step": 3698 + }, + { + "epoch": 0.34614124584639866, + "loss": 1.6018643379211426, + "loss_ce": 0.005184710957109928, + "loss_iou": 0.640625, + "loss_num": 0.0625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 244941636, + "step": 3698 + }, + { + "epoch": 0.34623484813029437, + "grad_norm": 35.30159378051758, + "learning_rate": 5e-05, + "loss": 1.3965, + "num_input_tokens_seen": 245007276, + "step": 3699 + }, + { + "epoch": 0.34623484813029437, + "loss": 1.3094220161437988, + "loss_ce": 0.005711059086024761, + "loss_iou": 0.5390625, + "loss_num": 0.044189453125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 245007276, + "step": 3699 + }, + { + "epoch": 0.3463284504141901, + "grad_norm": 20.247718811035156, + "learning_rate": 5e-05, + "loss": 1.554, + "num_input_tokens_seen": 245073048, + "step": 3700 + }, + { + "epoch": 0.3463284504141901, + "loss": 1.7371816635131836, + "loss_ce": 0.012083975598216057, + "loss_iou": 0.6484375, + "loss_num": 0.08544921875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 245073048, + "step": 3700 + }, + { + "epoch": 0.34642205269808585, + "grad_norm": 22.586915969848633, + "learning_rate": 5e-05, + "loss": 1.4198, + "num_input_tokens_seen": 245139508, + "step": 3701 + }, + { + "epoch": 0.34642205269808585, + "loss": 1.3193607330322266, + "loss_ce": 0.008325554430484772, + "loss_iou": 0.546875, + "loss_num": 0.043212890625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 245139508, + "step": 3701 + }, + { + "epoch": 0.34651565498198156, + "grad_norm": 44.17936325073242, + "learning_rate": 5e-05, + "loss": 1.4207, + "num_input_tokens_seen": 245205184, + "step": 3702 + }, + { + "epoch": 0.34651565498198156, + "loss": 1.4175338745117188, + "loss_ce": 0.006401140242815018, + "loss_iou": 0.5703125, + "loss_num": 0.0546875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 245205184, + "step": 3702 + }, + { + "epoch": 0.34660925726587727, + "grad_norm": 20.859495162963867, + "learning_rate": 5e-05, + "loss": 1.479, + "num_input_tokens_seen": 245271516, + "step": 3703 + }, + { + "epoch": 0.34660925726587727, + "loss": 1.3903142213821411, + "loss_ce": 0.005548547953367233, + "loss_iou": 0.59765625, + "loss_num": 0.03759765625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 245271516, + "step": 3703 + }, + { + "epoch": 0.34670285954977303, + "grad_norm": 10.4099702835083, + "learning_rate": 5e-05, + "loss": 1.1748, + "num_input_tokens_seen": 245337784, + "step": 3704 + }, + { + "epoch": 0.34670285954977303, + "loss": 1.2686253786087036, + "loss_ce": 0.0030003655701875687, + "loss_iou": 0.53125, + "loss_num": 0.04052734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 245337784, + "step": 3704 + }, + { + "epoch": 0.34679646183366875, + "grad_norm": 23.97836685180664, + "learning_rate": 5e-05, + "loss": 1.3845, + "num_input_tokens_seen": 245404520, + "step": 3705 + }, + { + "epoch": 0.34679646183366875, + "loss": 1.360370397567749, + "loss_ce": 0.007342985365539789, + "loss_iou": 0.5625, + "loss_num": 0.04541015625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 245404520, + "step": 3705 + }, + { + "epoch": 0.34689006411756446, + "grad_norm": 20.272201538085938, + "learning_rate": 5e-05, + "loss": 1.2688, + "num_input_tokens_seen": 245471380, + "step": 3706 + }, + { + "epoch": 0.34689006411756446, + "loss": 1.2851166725158691, + "loss_ce": 0.0053314100950956345, + "loss_iou": 0.5546875, + "loss_num": 0.033447265625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 245471380, + "step": 3706 + }, + { + "epoch": 0.3469836664014602, + "grad_norm": 25.2825870513916, + "learning_rate": 5e-05, + "loss": 1.4306, + "num_input_tokens_seen": 245536760, + "step": 3707 + }, + { + "epoch": 0.3469836664014602, + "loss": 1.44158935546875, + "loss_ce": 0.010925253853201866, + "loss_iou": 0.58203125, + "loss_num": 0.052978515625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 245536760, + "step": 3707 + }, + { + "epoch": 0.34707726868535593, + "grad_norm": 27.773530960083008, + "learning_rate": 5e-05, + "loss": 1.3239, + "num_input_tokens_seen": 245603424, + "step": 3708 + }, + { + "epoch": 0.34707726868535593, + "loss": 1.2831923961639404, + "loss_ce": 0.008778421208262444, + "loss_iou": 0.53515625, + "loss_num": 0.04150390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 245603424, + "step": 3708 + }, + { + "epoch": 0.34717087096925164, + "grad_norm": 26.971498489379883, + "learning_rate": 5e-05, + "loss": 1.584, + "num_input_tokens_seen": 245669460, + "step": 3709 + }, + { + "epoch": 0.34717087096925164, + "loss": 1.676065444946289, + "loss_ce": 0.003702209796756506, + "loss_iou": 0.65625, + "loss_num": 0.0712890625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 245669460, + "step": 3709 + }, + { + "epoch": 0.34726447325314735, + "grad_norm": 21.093570709228516, + "learning_rate": 5e-05, + "loss": 1.1342, + "num_input_tokens_seen": 245735736, + "step": 3710 + }, + { + "epoch": 0.34726447325314735, + "loss": 1.240703821182251, + "loss_ce": 0.0073054637759923935, + "loss_iou": 0.490234375, + "loss_num": 0.050537109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 245735736, + "step": 3710 + }, + { + "epoch": 0.3473580755370431, + "grad_norm": 29.543262481689453, + "learning_rate": 5e-05, + "loss": 1.1328, + "num_input_tokens_seen": 245802308, + "step": 3711 + }, + { + "epoch": 0.3473580755370431, + "loss": 1.1575729846954346, + "loss_ce": 0.005229260306805372, + "loss_iou": 0.49609375, + "loss_num": 0.03271484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 245802308, + "step": 3711 + }, + { + "epoch": 0.34745167782093883, + "grad_norm": 27.989383697509766, + "learning_rate": 5e-05, + "loss": 1.5556, + "num_input_tokens_seen": 245867520, + "step": 3712 + }, + { + "epoch": 0.34745167782093883, + "loss": 1.6030399799346924, + "loss_ce": 0.0053838156163692474, + "loss_iou": 0.62109375, + "loss_num": 0.0712890625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 245867520, + "step": 3712 + }, + { + "epoch": 0.34754528010483454, + "grad_norm": 38.358299255371094, + "learning_rate": 5e-05, + "loss": 1.1329, + "num_input_tokens_seen": 245934080, + "step": 3713 + }, + { + "epoch": 0.34754528010483454, + "loss": 1.0540615320205688, + "loss_ce": 0.0018154431600123644, + "loss_iou": 0.46875, + "loss_num": 0.02294921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 245934080, + "step": 3713 + }, + { + "epoch": 0.3476388823887303, + "grad_norm": 30.39254379272461, + "learning_rate": 5e-05, + "loss": 1.3977, + "num_input_tokens_seen": 246000392, + "step": 3714 + }, + { + "epoch": 0.3476388823887303, + "loss": 1.2956597805023193, + "loss_ce": 0.004644259810447693, + "loss_iou": 0.53125, + "loss_num": 0.04541015625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 246000392, + "step": 3714 + }, + { + "epoch": 0.347732484672626, + "grad_norm": 21.19542121887207, + "learning_rate": 5e-05, + "loss": 1.3307, + "num_input_tokens_seen": 246066356, + "step": 3715 + }, + { + "epoch": 0.347732484672626, + "loss": 1.5889265537261963, + "loss_ce": 0.0029890104196965694, + "loss_iou": 0.60546875, + "loss_num": 0.07470703125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 246066356, + "step": 3715 + }, + { + "epoch": 0.34782608695652173, + "grad_norm": 18.027751922607422, + "learning_rate": 5e-05, + "loss": 1.3525, + "num_input_tokens_seen": 246132344, + "step": 3716 + }, + { + "epoch": 0.34782608695652173, + "loss": 1.3334925174713135, + "loss_ce": 0.007808832451701164, + "loss_iou": 0.54296875, + "loss_num": 0.048828125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 246132344, + "step": 3716 + }, + { + "epoch": 0.34791968924041744, + "grad_norm": 54.89906311035156, + "learning_rate": 5e-05, + "loss": 1.3624, + "num_input_tokens_seen": 246199552, + "step": 3717 + }, + { + "epoch": 0.34791968924041744, + "loss": 1.5785040855407715, + "loss_ce": 0.005750169046223164, + "loss_iou": 0.62109375, + "loss_num": 0.0654296875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 246199552, + "step": 3717 + }, + { + "epoch": 0.3480132915243132, + "grad_norm": 30.902700424194336, + "learning_rate": 5e-05, + "loss": 1.8924, + "num_input_tokens_seen": 246265040, + "step": 3718 + }, + { + "epoch": 0.3480132915243132, + "loss": 1.7432301044464111, + "loss_ce": 0.005925359204411507, + "loss_iou": 0.703125, + "loss_num": 0.0673828125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 246265040, + "step": 3718 + }, + { + "epoch": 0.3481068938082089, + "grad_norm": 40.07839584350586, + "learning_rate": 5e-05, + "loss": 1.2299, + "num_input_tokens_seen": 246330648, + "step": 3719 + }, + { + "epoch": 0.3481068938082089, + "loss": 1.0856637954711914, + "loss_ce": 0.0046091387048363686, + "loss_iou": 0.435546875, + "loss_num": 0.041748046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 246330648, + "step": 3719 + }, + { + "epoch": 0.3482004960921046, + "grad_norm": 24.28256607055664, + "learning_rate": 5e-05, + "loss": 1.5795, + "num_input_tokens_seen": 246397028, + "step": 3720 + }, + { + "epoch": 0.3482004960921046, + "loss": 1.5197551250457764, + "loss_ce": 0.006571510806679726, + "loss_iou": 0.62890625, + "loss_num": 0.051513671875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 246397028, + "step": 3720 + }, + { + "epoch": 0.3482940983760004, + "grad_norm": 24.5905818939209, + "learning_rate": 5e-05, + "loss": 1.4159, + "num_input_tokens_seen": 246463484, + "step": 3721 + }, + { + "epoch": 0.3482940983760004, + "loss": 1.5609971284866333, + "loss_ce": 0.0053330278024077415, + "loss_iou": 0.64453125, + "loss_num": 0.053466796875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 246463484, + "step": 3721 + }, + { + "epoch": 0.3483877006598961, + "grad_norm": 22.278881072998047, + "learning_rate": 5e-05, + "loss": 1.3557, + "num_input_tokens_seen": 246529304, + "step": 3722 + }, + { + "epoch": 0.3483877006598961, + "loss": 1.4038203954696655, + "loss_ce": 0.005382911302149296, + "loss_iou": 0.56640625, + "loss_num": 0.052978515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 246529304, + "step": 3722 + }, + { + "epoch": 0.3484813029437918, + "grad_norm": 62.42136001586914, + "learning_rate": 5e-05, + "loss": 1.4577, + "num_input_tokens_seen": 246595196, + "step": 3723 + }, + { + "epoch": 0.3484813029437918, + "loss": 1.490161657333374, + "loss_ce": 0.006274977698922157, + "loss_iou": 0.58203125, + "loss_num": 0.0634765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 246595196, + "step": 3723 + }, + { + "epoch": 0.3485749052276876, + "grad_norm": 21.269954681396484, + "learning_rate": 5e-05, + "loss": 1.2955, + "num_input_tokens_seen": 246660448, + "step": 3724 + }, + { + "epoch": 0.3485749052276876, + "loss": 1.2220447063446045, + "loss_ce": 0.005858277902007103, + "loss_iou": 0.47265625, + "loss_num": 0.054443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 246660448, + "step": 3724 + }, + { + "epoch": 0.3486685075115833, + "grad_norm": 20.63177490234375, + "learning_rate": 5e-05, + "loss": 1.1556, + "num_input_tokens_seen": 246726176, + "step": 3725 + }, + { + "epoch": 0.3486685075115833, + "loss": 0.934677004814148, + "loss_ce": 0.0059660375118255615, + "loss_iou": 0.3984375, + "loss_num": 0.026123046875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 246726176, + "step": 3725 + }, + { + "epoch": 0.348762109795479, + "grad_norm": 19.049415588378906, + "learning_rate": 5e-05, + "loss": 1.298, + "num_input_tokens_seen": 246792180, + "step": 3726 + }, + { + "epoch": 0.348762109795479, + "loss": 1.1643699407577515, + "loss_ce": 0.008119983598589897, + "loss_iou": 0.44140625, + "loss_num": 0.054931640625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 246792180, + "step": 3726 + }, + { + "epoch": 0.3488557120793747, + "grad_norm": 27.139068603515625, + "learning_rate": 5e-05, + "loss": 1.4329, + "num_input_tokens_seen": 246857796, + "step": 3727 + }, + { + "epoch": 0.3488557120793747, + "loss": 1.4684312343597412, + "loss_ce": 0.004808175843209028, + "loss_iou": 0.58984375, + "loss_num": 0.057373046875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 246857796, + "step": 3727 + }, + { + "epoch": 0.3489493143632705, + "grad_norm": 25.338150024414062, + "learning_rate": 5e-05, + "loss": 1.3457, + "num_input_tokens_seen": 246924268, + "step": 3728 + }, + { + "epoch": 0.3489493143632705, + "loss": 1.1883865594863892, + "loss_ce": 0.006257642991840839, + "loss_iou": 0.515625, + "loss_num": 0.0303955078125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 246924268, + "step": 3728 + }, + { + "epoch": 0.3490429166471662, + "grad_norm": 27.340499877929688, + "learning_rate": 5e-05, + "loss": 1.3044, + "num_input_tokens_seen": 246991528, + "step": 3729 + }, + { + "epoch": 0.3490429166471662, + "loss": 1.4734461307525635, + "loss_ce": 0.008602448739111423, + "loss_iou": 0.5859375, + "loss_num": 0.057861328125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 246991528, + "step": 3729 + }, + { + "epoch": 0.3491365189310619, + "grad_norm": 27.09548568725586, + "learning_rate": 5e-05, + "loss": 1.3197, + "num_input_tokens_seen": 247058764, + "step": 3730 + }, + { + "epoch": 0.3491365189310619, + "loss": 1.2067992687225342, + "loss_ce": 0.007092206738889217, + "loss_iou": 0.51171875, + "loss_num": 0.03515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 247058764, + "step": 3730 + }, + { + "epoch": 0.34923012121495767, + "grad_norm": 24.08138084411621, + "learning_rate": 5e-05, + "loss": 1.399, + "num_input_tokens_seen": 247125216, + "step": 3731 + }, + { + "epoch": 0.34923012121495767, + "loss": 1.2107813358306885, + "loss_ce": 0.00716799683868885, + "loss_iou": 0.490234375, + "loss_num": 0.04443359375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 247125216, + "step": 3731 + }, + { + "epoch": 0.3493237234988534, + "grad_norm": 27.801660537719727, + "learning_rate": 5e-05, + "loss": 1.3599, + "num_input_tokens_seen": 247190348, + "step": 3732 + }, + { + "epoch": 0.3493237234988534, + "loss": 1.091841220855713, + "loss_ce": 0.006147854961454868, + "loss_iou": 0.42578125, + "loss_num": 0.046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 247190348, + "step": 3732 + }, + { + "epoch": 0.3494173257827491, + "grad_norm": 16.304279327392578, + "learning_rate": 5e-05, + "loss": 1.448, + "num_input_tokens_seen": 247255968, + "step": 3733 + }, + { + "epoch": 0.3494173257827491, + "loss": 1.4395084381103516, + "loss_ce": 0.005426528863608837, + "loss_iou": 0.56640625, + "loss_num": 0.0595703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 247255968, + "step": 3733 + }, + { + "epoch": 0.3495109280666448, + "grad_norm": 18.43020248413086, + "learning_rate": 5e-05, + "loss": 1.2068, + "num_input_tokens_seen": 247322588, + "step": 3734 + }, + { + "epoch": 0.3495109280666448, + "loss": 1.3476488590240479, + "loss_ce": 0.006828451529145241, + "loss_iou": 0.56640625, + "loss_num": 0.0419921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 247322588, + "step": 3734 + }, + { + "epoch": 0.34960453035054057, + "grad_norm": 20.443666458129883, + "learning_rate": 5e-05, + "loss": 1.2719, + "num_input_tokens_seen": 247388704, + "step": 3735 + }, + { + "epoch": 0.34960453035054057, + "loss": 0.9696588516235352, + "loss_ce": 0.008721387013792992, + "loss_iou": 0.390625, + "loss_num": 0.035888671875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 247388704, + "step": 3735 + }, + { + "epoch": 0.3496981326344363, + "grad_norm": 32.19856643676758, + "learning_rate": 5e-05, + "loss": 1.581, + "num_input_tokens_seen": 247454628, + "step": 3736 + }, + { + "epoch": 0.3496981326344363, + "loss": 1.8044781684875488, + "loss_ce": 0.010532870888710022, + "loss_iou": 0.6796875, + "loss_num": 0.0859375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 247454628, + "step": 3736 + }, + { + "epoch": 0.349791734918332, + "grad_norm": 43.47623062133789, + "learning_rate": 5e-05, + "loss": 1.4253, + "num_input_tokens_seen": 247520040, + "step": 3737 + }, + { + "epoch": 0.349791734918332, + "loss": 1.3010234832763672, + "loss_ce": 0.005125178024172783, + "loss_iou": 0.53125, + "loss_num": 0.046630859375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 247520040, + "step": 3737 + }, + { + "epoch": 0.34988533720222775, + "grad_norm": 26.866897583007812, + "learning_rate": 5e-05, + "loss": 1.4757, + "num_input_tokens_seen": 247585524, + "step": 3738 + }, + { + "epoch": 0.34988533720222775, + "loss": 1.554808497428894, + "loss_ce": 0.003050694242119789, + "loss_iou": 0.64453125, + "loss_num": 0.05322265625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 247585524, + "step": 3738 + }, + { + "epoch": 0.34997893948612346, + "grad_norm": 34.61675262451172, + "learning_rate": 5e-05, + "loss": 1.1528, + "num_input_tokens_seen": 247652392, + "step": 3739 + }, + { + "epoch": 0.34997893948612346, + "loss": 0.9877606630325317, + "loss_ce": 0.0077802203595638275, + "loss_iou": 0.400390625, + "loss_num": 0.03564453125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 247652392, + "step": 3739 + }, + { + "epoch": 0.3500725417700192, + "grad_norm": 25.95360565185547, + "learning_rate": 5e-05, + "loss": 1.3645, + "num_input_tokens_seen": 247718332, + "step": 3740 + }, + { + "epoch": 0.3500725417700192, + "loss": 1.4898865222930908, + "loss_ce": 0.005511471536010504, + "loss_iou": 0.59765625, + "loss_num": 0.057373046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 247718332, + "step": 3740 + }, + { + "epoch": 0.35016614405391494, + "grad_norm": 19.525766372680664, + "learning_rate": 5e-05, + "loss": 1.3563, + "num_input_tokens_seen": 247785160, + "step": 3741 + }, + { + "epoch": 0.35016614405391494, + "loss": 1.343273639678955, + "loss_ce": 0.005382982082664967, + "loss_iou": 0.515625, + "loss_num": 0.06103515625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 247785160, + "step": 3741 + }, + { + "epoch": 0.35025974633781065, + "grad_norm": 25.111024856567383, + "learning_rate": 5e-05, + "loss": 1.5062, + "num_input_tokens_seen": 247851032, + "step": 3742 + }, + { + "epoch": 0.35025974633781065, + "loss": 1.586619257926941, + "loss_ce": 0.008494309149682522, + "loss_iou": 0.63671875, + "loss_num": 0.061279296875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 247851032, + "step": 3742 + }, + { + "epoch": 0.35035334862170636, + "grad_norm": 34.43784713745117, + "learning_rate": 5e-05, + "loss": 1.8012, + "num_input_tokens_seen": 247917356, + "step": 3743 + }, + { + "epoch": 0.35035334862170636, + "loss": 2.063164710998535, + "loss_ce": 0.00945371575653553, + "loss_iou": 0.79296875, + "loss_num": 0.09375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 247917356, + "step": 3743 + }, + { + "epoch": 0.3504469509056021, + "grad_norm": 22.911392211914062, + "learning_rate": 5e-05, + "loss": 1.5925, + "num_input_tokens_seen": 247984328, + "step": 3744 + }, + { + "epoch": 0.3504469509056021, + "loss": 1.6110068559646606, + "loss_ce": 0.006514659151434898, + "loss_iou": 0.671875, + "loss_num": 0.052490234375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 247984328, + "step": 3744 + }, + { + "epoch": 0.35054055318949784, + "grad_norm": 15.441649436950684, + "learning_rate": 5e-05, + "loss": 1.3077, + "num_input_tokens_seen": 248050976, + "step": 3745 + }, + { + "epoch": 0.35054055318949784, + "loss": 1.418440818786621, + "loss_ce": 0.004866563715040684, + "loss_iou": 0.59375, + "loss_num": 0.045654296875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 248050976, + "step": 3745 + }, + { + "epoch": 0.35063415547339355, + "grad_norm": 29.87442398071289, + "learning_rate": 5e-05, + "loss": 1.3034, + "num_input_tokens_seen": 248117104, + "step": 3746 + }, + { + "epoch": 0.35063415547339355, + "loss": 1.0477105379104614, + "loss_ce": 0.0037651462480425835, + "loss_iou": 0.42578125, + "loss_num": 0.03857421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 248117104, + "step": 3746 + }, + { + "epoch": 0.35072775775728926, + "grad_norm": 69.46481323242188, + "learning_rate": 5e-05, + "loss": 1.3402, + "num_input_tokens_seen": 248182724, + "step": 3747 + }, + { + "epoch": 0.35072775775728926, + "loss": 1.389664888381958, + "loss_ce": 0.002946228487417102, + "loss_iou": 0.5546875, + "loss_num": 0.055419921875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 248182724, + "step": 3747 + }, + { + "epoch": 0.350821360041185, + "grad_norm": 37.87159729003906, + "learning_rate": 5e-05, + "loss": 1.5612, + "num_input_tokens_seen": 248249392, + "step": 3748 + }, + { + "epoch": 0.350821360041185, + "loss": 1.309908390045166, + "loss_ce": 0.007174026221036911, + "loss_iou": 0.58984375, + "loss_num": 0.025146484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 248249392, + "step": 3748 + }, + { + "epoch": 0.35091496232508074, + "grad_norm": 14.190786361694336, + "learning_rate": 5e-05, + "loss": 1.4599, + "num_input_tokens_seen": 248316060, + "step": 3749 + }, + { + "epoch": 0.35091496232508074, + "loss": 1.6260002851486206, + "loss_ce": 0.005883119069039822, + "loss_iou": 0.59375, + "loss_num": 0.0859375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 248316060, + "step": 3749 + }, + { + "epoch": 0.35100856460897645, + "grad_norm": 17.839017868041992, + "learning_rate": 5e-05, + "loss": 1.1132, + "num_input_tokens_seen": 248381900, + "step": 3750 + }, + { + "epoch": 0.35100856460897645, + "eval_seeclick_CIoU": 0.1675967127084732, + "eval_seeclick_GIoU": 0.18281087279319763, + "eval_seeclick_IoU": 0.2830906957387924, + "eval_seeclick_MAE_all": 0.17341461777687073, + "eval_seeclick_MAE_h": 0.09848669171333313, + "eval_seeclick_MAE_w": 0.12923284247517586, + "eval_seeclick_MAE_x_boxes": 0.2607213482260704, + "eval_seeclick_MAE_y_boxes": 0.12332096695899963, + "eval_seeclick_NUM_probability": 0.9999575316905975, + "eval_seeclick_inside_bbox": 0.38750000298023224, + "eval_seeclick_loss": 2.4699790477752686, + "eval_seeclick_loss_ce": 0.013367105275392532, + "eval_seeclick_loss_iou": 0.8355712890625, + "eval_seeclick_loss_num": 0.1644439697265625, + "eval_seeclick_loss_xval": 2.49267578125, + "eval_seeclick_runtime": 68.9088, + "eval_seeclick_samples_per_second": 0.682, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 248381900, + "step": 3750 + }, + { + "epoch": 0.35100856460897645, + "eval_icons_CIoU": -0.1398204267024994, + "eval_icons_GIoU": 0.004120431374758482, + "eval_icons_IoU": 0.06718008033931255, + "eval_icons_MAE_all": 0.21449340134859085, + "eval_icons_MAE_h": 0.2233874425292015, + "eval_icons_MAE_w": 0.19416968524456024, + "eval_icons_MAE_x_boxes": 0.14719068259000778, + "eval_icons_MAE_y_boxes": 0.09624434635043144, + "eval_icons_NUM_probability": 0.9998857080936432, + "eval_icons_inside_bbox": 0.1336805559694767, + "eval_icons_loss": 3.072598934173584, + "eval_icons_loss_ce": 0.00033260000054724514, + "eval_icons_loss_iou": 0.988037109375, + "eval_icons_loss_num": 0.214599609375, + "eval_icons_loss_xval": 3.0498046875, + "eval_icons_runtime": 70.9789, + "eval_icons_samples_per_second": 0.704, + "eval_icons_steps_per_second": 0.028, + "num_input_tokens_seen": 248381900, + "step": 3750 + }, + { + "epoch": 0.35100856460897645, + "eval_screenspot_CIoU": -0.01793273165822029, + "eval_screenspot_GIoU": 0.0311531195572267, + "eval_screenspot_IoU": 0.16112064321835837, + "eval_screenspot_MAE_all": 0.21388815840085348, + "eval_screenspot_MAE_h": 0.18546390533447266, + "eval_screenspot_MAE_w": 0.19503758351008096, + "eval_screenspot_MAE_x_boxes": 0.27813207109769184, + "eval_screenspot_MAE_y_boxes": 0.10236301769812901, + "eval_screenspot_NUM_probability": 0.9999507665634155, + "eval_screenspot_inside_bbox": 0.2912500003973643, + "eval_screenspot_loss": 3.0125794410705566, + "eval_screenspot_loss_ce": 0.011606858111917973, + "eval_screenspot_loss_iou": 0.98046875, + "eval_screenspot_loss_num": 0.22271728515625, + "eval_screenspot_loss_xval": 3.0732421875, + "eval_screenspot_runtime": 119.8004, + "eval_screenspot_samples_per_second": 0.743, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 248381900, + "step": 3750 + }, + { + "epoch": 0.35100856460897645, + "eval_compot_CIoU": -0.10679537430405617, + "eval_compot_GIoU": -0.010890580713748932, + "eval_compot_IoU": 0.09891067445278168, + "eval_compot_MAE_all": 0.2528786063194275, + "eval_compot_MAE_h": 0.236269049346447, + "eval_compot_MAE_w": 0.34164193272590637, + "eval_compot_MAE_x_boxes": 0.14958462119102478, + "eval_compot_MAE_y_boxes": 0.08694823086261749, + "eval_compot_NUM_probability": 0.9998629093170166, + "eval_compot_inside_bbox": 0.1892361119389534, + "eval_compot_loss": 3.3435428142547607, + "eval_compot_loss_ce": 0.008334077894687653, + "eval_compot_loss_iou": 1.054931640625, + "eval_compot_loss_num": 0.265899658203125, + "eval_compot_loss_xval": 3.4365234375, + "eval_compot_runtime": 72.0474, + "eval_compot_samples_per_second": 0.694, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 248381900, + "step": 3750 + }, + { + "epoch": 0.35100856460897645, + "eval_custom_ui_MAE_all": 0.17373321950435638, + "eval_custom_ui_MAE_x": 0.20427002012729645, + "eval_custom_ui_MAE_y": 0.14319640398025513, + "eval_custom_ui_NUM_probability": 0.9999723434448242, + "eval_custom_ui_loss": 1.0378717184066772, + "eval_custom_ui_loss_ce": 0.21781423687934875, + "eval_custom_ui_loss_num": 0.170257568359375, + "eval_custom_ui_loss_xval": 0.850830078125, + "eval_custom_ui_runtime": 53.808, + "eval_custom_ui_samples_per_second": 0.929, + "eval_custom_ui_steps_per_second": 0.037, + "num_input_tokens_seen": 248381900, + "step": 3750 + }, + { + "epoch": 0.35100856460897645, + "loss": 1.0204535722732544, + "loss_ce": 0.2328559309244156, + "loss_iou": 0.0, + "loss_num": 0.1572265625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 248381900, + "step": 3750 + }, + { + "epoch": 0.35110216689287216, + "grad_norm": 28.215551376342773, + "learning_rate": 5e-05, + "loss": 1.3363, + "num_input_tokens_seen": 248448948, + "step": 3751 + }, + { + "epoch": 0.35110216689287216, + "loss": 1.2733601331710815, + "loss_ce": 0.008223352022469044, + "loss_iou": 0.48828125, + "loss_num": 0.05810546875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 248448948, + "step": 3751 + }, + { + "epoch": 0.3511957691767679, + "grad_norm": 59.027870178222656, + "learning_rate": 5e-05, + "loss": 1.3765, + "num_input_tokens_seen": 248516020, + "step": 3752 + }, + { + "epoch": 0.3511957691767679, + "loss": 1.4210923910140991, + "loss_ce": 0.008982954546809196, + "loss_iou": 0.59765625, + "loss_num": 0.04296875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 248516020, + "step": 3752 + }, + { + "epoch": 0.35128937146066364, + "grad_norm": 60.99232482910156, + "learning_rate": 5e-05, + "loss": 1.542, + "num_input_tokens_seen": 248581520, + "step": 3753 + }, + { + "epoch": 0.35128937146066364, + "loss": 1.66731858253479, + "loss_ce": 0.0037444327026605606, + "loss_iou": 0.65625, + "loss_num": 0.06982421875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 248581520, + "step": 3753 + }, + { + "epoch": 0.35138297374455935, + "grad_norm": 30.35032844543457, + "learning_rate": 5e-05, + "loss": 1.4345, + "num_input_tokens_seen": 248648016, + "step": 3754 + }, + { + "epoch": 0.35138297374455935, + "loss": 1.4922391176223755, + "loss_ce": 0.002981330268085003, + "loss_iou": 0.56640625, + "loss_num": 0.07177734375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 248648016, + "step": 3754 + }, + { + "epoch": 0.3514765760284551, + "grad_norm": 38.929969787597656, + "learning_rate": 5e-05, + "loss": 1.414, + "num_input_tokens_seen": 248714624, + "step": 3755 + }, + { + "epoch": 0.3514765760284551, + "loss": 1.5615707635879517, + "loss_ce": 0.010789508000016212, + "loss_iou": 0.6171875, + "loss_num": 0.06396484375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 248714624, + "step": 3755 + }, + { + "epoch": 0.3515701783123508, + "grad_norm": 21.145841598510742, + "learning_rate": 5e-05, + "loss": 1.6155, + "num_input_tokens_seen": 248780872, + "step": 3756 + }, + { + "epoch": 0.3515701783123508, + "loss": 1.6254315376281738, + "loss_ce": 0.008244091644883156, + "loss_iou": 0.68359375, + "loss_num": 0.05029296875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 248780872, + "step": 3756 + }, + { + "epoch": 0.35166378059624653, + "grad_norm": 18.661279678344727, + "learning_rate": 5e-05, + "loss": 1.3232, + "num_input_tokens_seen": 248847868, + "step": 3757 + }, + { + "epoch": 0.35166378059624653, + "loss": 1.2235629558563232, + "loss_ce": 0.0033480715937912464, + "loss_iou": 0.51171875, + "loss_num": 0.03857421875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 248847868, + "step": 3757 + }, + { + "epoch": 0.3517573828801423, + "grad_norm": 25.271100997924805, + "learning_rate": 5e-05, + "loss": 1.2365, + "num_input_tokens_seen": 248914544, + "step": 3758 + }, + { + "epoch": 0.3517573828801423, + "loss": 1.090301513671875, + "loss_ce": 0.004364030435681343, + "loss_iou": 0.486328125, + "loss_num": 0.0224609375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 248914544, + "step": 3758 + }, + { + "epoch": 0.351850985164038, + "grad_norm": 24.748661041259766, + "learning_rate": 5e-05, + "loss": 1.3842, + "num_input_tokens_seen": 248981016, + "step": 3759 + }, + { + "epoch": 0.351850985164038, + "loss": 1.4900479316711426, + "loss_ce": 0.006649418734014034, + "loss_iou": 0.60546875, + "loss_num": 0.055419921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 248981016, + "step": 3759 + }, + { + "epoch": 0.3519445874479337, + "grad_norm": 22.86206817626953, + "learning_rate": 5e-05, + "loss": 1.3847, + "num_input_tokens_seen": 249048088, + "step": 3760 + }, + { + "epoch": 0.3519445874479337, + "loss": 1.5376489162445068, + "loss_ce": 0.007375461980700493, + "loss_iou": 0.6328125, + "loss_num": 0.052978515625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 249048088, + "step": 3760 + }, + { + "epoch": 0.35203818973182943, + "grad_norm": 34.3881721496582, + "learning_rate": 5e-05, + "loss": 1.2414, + "num_input_tokens_seen": 249113680, + "step": 3761 + }, + { + "epoch": 0.35203818973182943, + "loss": 1.1507374048233032, + "loss_ce": 0.008891724050045013, + "loss_iou": 0.458984375, + "loss_num": 0.044677734375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 249113680, + "step": 3761 + }, + { + "epoch": 0.3521317920157252, + "grad_norm": 30.287397384643555, + "learning_rate": 5e-05, + "loss": 1.6355, + "num_input_tokens_seen": 249179744, + "step": 3762 + }, + { + "epoch": 0.3521317920157252, + "loss": 1.8284342288970947, + "loss_ce": 0.003238811856135726, + "loss_iou": 0.73046875, + "loss_num": 0.0732421875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 249179744, + "step": 3762 + }, + { + "epoch": 0.3522253942996209, + "grad_norm": 21.533756256103516, + "learning_rate": 5e-05, + "loss": 1.3172, + "num_input_tokens_seen": 249246988, + "step": 3763 + }, + { + "epoch": 0.3522253942996209, + "loss": 1.338017463684082, + "loss_ce": 0.004033091012388468, + "loss_iou": 0.54296875, + "loss_num": 0.04931640625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 249246988, + "step": 3763 + }, + { + "epoch": 0.3523189965835166, + "grad_norm": 28.634693145751953, + "learning_rate": 5e-05, + "loss": 1.2778, + "num_input_tokens_seen": 249312740, + "step": 3764 + }, + { + "epoch": 0.3523189965835166, + "loss": 1.2920770645141602, + "loss_ce": 0.006432513706386089, + "loss_iou": 0.45703125, + "loss_num": 0.07373046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 249312740, + "step": 3764 + }, + { + "epoch": 0.3524125988674124, + "grad_norm": 34.45309066772461, + "learning_rate": 5e-05, + "loss": 1.4943, + "num_input_tokens_seen": 249379832, + "step": 3765 + }, + { + "epoch": 0.3524125988674124, + "loss": 1.5002496242523193, + "loss_ce": 0.005132400430738926, + "loss_iou": 0.640625, + "loss_num": 0.04345703125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 249379832, + "step": 3765 + }, + { + "epoch": 0.3525062011513081, + "grad_norm": 52.35409164428711, + "learning_rate": 5e-05, + "loss": 1.0973, + "num_input_tokens_seen": 249445796, + "step": 3766 + }, + { + "epoch": 0.3525062011513081, + "loss": 0.9985809326171875, + "loss_ce": 0.004928619600832462, + "loss_iou": 0.40625, + "loss_num": 0.035888671875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 249445796, + "step": 3766 + }, + { + "epoch": 0.3525998034352038, + "grad_norm": 65.1169204711914, + "learning_rate": 5e-05, + "loss": 1.261, + "num_input_tokens_seen": 249511664, + "step": 3767 + }, + { + "epoch": 0.3525998034352038, + "loss": 1.1969112157821655, + "loss_ce": 0.00550497230142355, + "loss_iou": 0.466796875, + "loss_num": 0.052001953125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 249511664, + "step": 3767 + }, + { + "epoch": 0.3526934057190996, + "grad_norm": 31.02557945251465, + "learning_rate": 5e-05, + "loss": 1.4004, + "num_input_tokens_seen": 249577608, + "step": 3768 + }, + { + "epoch": 0.3526934057190996, + "loss": 1.3097755908966064, + "loss_ce": 0.0036232187412679195, + "loss_iou": 0.52734375, + "loss_num": 0.0498046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 249577608, + "step": 3768 + }, + { + "epoch": 0.3527870080029953, + "grad_norm": 26.30963897705078, + "learning_rate": 5e-05, + "loss": 1.4664, + "num_input_tokens_seen": 249642692, + "step": 3769 + }, + { + "epoch": 0.3527870080029953, + "loss": 1.4610590934753418, + "loss_ce": 0.008544469252228737, + "loss_iou": 0.5625, + "loss_num": 0.06494140625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 249642692, + "step": 3769 + }, + { + "epoch": 0.352880610286891, + "grad_norm": 12.026778221130371, + "learning_rate": 5e-05, + "loss": 1.1388, + "num_input_tokens_seen": 249708692, + "step": 3770 + }, + { + "epoch": 0.352880610286891, + "loss": 1.216174602508545, + "loss_ce": 0.0038942997343838215, + "loss_iou": 0.482421875, + "loss_num": 0.048828125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 249708692, + "step": 3770 + }, + { + "epoch": 0.3529742125707867, + "grad_norm": 20.33180046081543, + "learning_rate": 5e-05, + "loss": 1.1762, + "num_input_tokens_seen": 249775016, + "step": 3771 + }, + { + "epoch": 0.3529742125707867, + "loss": 1.1421606540679932, + "loss_ce": 0.005075741559267044, + "loss_iou": 0.478515625, + "loss_num": 0.035888671875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 249775016, + "step": 3771 + }, + { + "epoch": 0.35306781485468247, + "grad_norm": 22.935237884521484, + "learning_rate": 5e-05, + "loss": 1.3296, + "num_input_tokens_seen": 249841488, + "step": 3772 + }, + { + "epoch": 0.35306781485468247, + "loss": 1.2075347900390625, + "loss_ce": 0.004898104816675186, + "loss_iou": 0.486328125, + "loss_num": 0.045654296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 249841488, + "step": 3772 + }, + { + "epoch": 0.3531614171385782, + "grad_norm": 29.02605438232422, + "learning_rate": 5e-05, + "loss": 1.355, + "num_input_tokens_seen": 249908660, + "step": 3773 + }, + { + "epoch": 0.3531614171385782, + "loss": 1.3004937171936035, + "loss_ce": 0.0036187791265547276, + "loss_iou": 0.515625, + "loss_num": 0.052978515625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 249908660, + "step": 3773 + }, + { + "epoch": 0.3532550194224739, + "grad_norm": 38.25444030761719, + "learning_rate": 5e-05, + "loss": 1.466, + "num_input_tokens_seen": 249973572, + "step": 3774 + }, + { + "epoch": 0.3532550194224739, + "loss": 1.4684109687805176, + "loss_ce": 0.008938336744904518, + "loss_iou": 0.59375, + "loss_num": 0.05517578125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 249973572, + "step": 3774 + }, + { + "epoch": 0.35334862170636966, + "grad_norm": 21.02090835571289, + "learning_rate": 5e-05, + "loss": 1.456, + "num_input_tokens_seen": 250039832, + "step": 3775 + }, + { + "epoch": 0.35334862170636966, + "loss": 1.5842968225479126, + "loss_ce": 0.011298801749944687, + "loss_iou": 0.60546875, + "loss_num": 0.072265625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 250039832, + "step": 3775 + }, + { + "epoch": 0.35344222399026537, + "grad_norm": 15.860291481018066, + "learning_rate": 5e-05, + "loss": 1.3043, + "num_input_tokens_seen": 250105992, + "step": 3776 + }, + { + "epoch": 0.35344222399026537, + "loss": 1.246699571609497, + "loss_ce": 0.005000336095690727, + "loss_iou": 0.51953125, + "loss_num": 0.04150390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 250105992, + "step": 3776 + }, + { + "epoch": 0.3535358262741611, + "grad_norm": 28.231550216674805, + "learning_rate": 5e-05, + "loss": 1.3316, + "num_input_tokens_seen": 250172276, + "step": 3777 + }, + { + "epoch": 0.3535358262741611, + "loss": 1.4352359771728516, + "loss_ce": 0.004571851342916489, + "loss_iou": 0.57421875, + "loss_num": 0.056640625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 250172276, + "step": 3777 + }, + { + "epoch": 0.3536294285580568, + "grad_norm": 20.316654205322266, + "learning_rate": 5e-05, + "loss": 1.1808, + "num_input_tokens_seen": 250239324, + "step": 3778 + }, + { + "epoch": 0.3536294285580568, + "loss": 1.1808514595031738, + "loss_ce": 0.005314263980835676, + "loss_iou": 0.482421875, + "loss_num": 0.0419921875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 250239324, + "step": 3778 + }, + { + "epoch": 0.35372303084195256, + "grad_norm": 26.929428100585938, + "learning_rate": 5e-05, + "loss": 1.3109, + "num_input_tokens_seen": 250305160, + "step": 3779 + }, + { + "epoch": 0.35372303084195256, + "loss": 1.23021399974823, + "loss_ce": 0.006092979572713375, + "loss_iou": 0.5, + "loss_num": 0.04443359375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 250305160, + "step": 3779 + }, + { + "epoch": 0.35381663312584827, + "grad_norm": 39.06914138793945, + "learning_rate": 5e-05, + "loss": 1.4644, + "num_input_tokens_seen": 250371812, + "step": 3780 + }, + { + "epoch": 0.35381663312584827, + "loss": 1.6360867023468018, + "loss_ce": 0.007668718695640564, + "loss_iou": 0.65234375, + "loss_num": 0.064453125, + "loss_xval": 1.625, + "num_input_tokens_seen": 250371812, + "step": 3780 + }, + { + "epoch": 0.353910235409744, + "grad_norm": 42.50438690185547, + "learning_rate": 5e-05, + "loss": 1.5819, + "num_input_tokens_seen": 250439176, + "step": 3781 + }, + { + "epoch": 0.353910235409744, + "loss": 1.6286696195602417, + "loss_ce": 0.007575837429612875, + "loss_iou": 0.671875, + "loss_num": 0.0556640625, + "loss_xval": 1.625, + "num_input_tokens_seen": 250439176, + "step": 3781 + }, + { + "epoch": 0.35400383769363974, + "grad_norm": 140.99368286132812, + "learning_rate": 5e-05, + "loss": 1.1349, + "num_input_tokens_seen": 250504520, + "step": 3782 + }, + { + "epoch": 0.35400383769363974, + "loss": 1.1769592761993408, + "loss_ce": 0.0036194026470184326, + "loss_iou": 0.48046875, + "loss_num": 0.042724609375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 250504520, + "step": 3782 + }, + { + "epoch": 0.35409743997753546, + "grad_norm": 23.02762794494629, + "learning_rate": 5e-05, + "loss": 1.5267, + "num_input_tokens_seen": 250570640, + "step": 3783 + }, + { + "epoch": 0.35409743997753546, + "loss": 1.5634300708770752, + "loss_ce": 0.011184044182300568, + "loss_iou": 0.58984375, + "loss_num": 0.07421875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 250570640, + "step": 3783 + }, + { + "epoch": 0.35419104226143117, + "grad_norm": 18.30438232421875, + "learning_rate": 5e-05, + "loss": 1.1609, + "num_input_tokens_seen": 250635644, + "step": 3784 + }, + { + "epoch": 0.35419104226143117, + "loss": 1.2793399095535278, + "loss_ce": 0.004925830289721489, + "loss_iou": 0.55078125, + "loss_num": 0.03515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 250635644, + "step": 3784 + }, + { + "epoch": 0.35428464454532693, + "grad_norm": 30.881805419921875, + "learning_rate": 5e-05, + "loss": 1.3808, + "num_input_tokens_seen": 250701120, + "step": 3785 + }, + { + "epoch": 0.35428464454532693, + "loss": 1.1811391115188599, + "loss_ce": 0.005846105050295591, + "loss_iou": 0.48828125, + "loss_num": 0.03955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 250701120, + "step": 3785 + }, + { + "epoch": 0.35437824682922264, + "grad_norm": 32.559993743896484, + "learning_rate": 5e-05, + "loss": 1.2272, + "num_input_tokens_seen": 250767312, + "step": 3786 + }, + { + "epoch": 0.35437824682922264, + "loss": 1.3315820693969727, + "loss_ce": 0.0073633925057947636, + "loss_iou": 0.5703125, + "loss_num": 0.03662109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 250767312, + "step": 3786 + }, + { + "epoch": 0.35447184911311835, + "grad_norm": 26.60196876525879, + "learning_rate": 5e-05, + "loss": 1.717, + "num_input_tokens_seen": 250832468, + "step": 3787 + }, + { + "epoch": 0.35447184911311835, + "loss": 1.5778484344482422, + "loss_ce": 0.008512407541275024, + "loss_iou": 0.58203125, + "loss_num": 0.08154296875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 250832468, + "step": 3787 + }, + { + "epoch": 0.35456545139701406, + "grad_norm": 22.825754165649414, + "learning_rate": 5e-05, + "loss": 1.2415, + "num_input_tokens_seen": 250899164, + "step": 3788 + }, + { + "epoch": 0.35456545139701406, + "loss": 1.3567886352539062, + "loss_ce": 0.006202704273164272, + "loss_iou": 0.546875, + "loss_num": 0.052001953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 250899164, + "step": 3788 + }, + { + "epoch": 0.35465905368090983, + "grad_norm": 28.716413497924805, + "learning_rate": 5e-05, + "loss": 1.3179, + "num_input_tokens_seen": 250964908, + "step": 3789 + }, + { + "epoch": 0.35465905368090983, + "loss": 1.4335330724716187, + "loss_ce": 0.004822130315005779, + "loss_iou": 0.5234375, + "loss_num": 0.076171875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 250964908, + "step": 3789 + }, + { + "epoch": 0.35475265596480554, + "grad_norm": 35.66719055175781, + "learning_rate": 5e-05, + "loss": 1.7455, + "num_input_tokens_seen": 251032036, + "step": 3790 + }, + { + "epoch": 0.35475265596480554, + "loss": 1.7090213298797607, + "loss_ce": 0.004431548062711954, + "loss_iou": 0.63671875, + "loss_num": 0.0859375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 251032036, + "step": 3790 + }, + { + "epoch": 0.35484625824870125, + "grad_norm": 60.46017074584961, + "learning_rate": 5e-05, + "loss": 1.7746, + "num_input_tokens_seen": 251098572, + "step": 3791 + }, + { + "epoch": 0.35484625824870125, + "loss": 2.071401357650757, + "loss_ce": 0.006948351860046387, + "loss_iou": 0.79296875, + "loss_num": 0.0966796875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 251098572, + "step": 3791 + }, + { + "epoch": 0.354939860532597, + "grad_norm": 17.12310028076172, + "learning_rate": 5e-05, + "loss": 1.2025, + "num_input_tokens_seen": 251163432, + "step": 3792 + }, + { + "epoch": 0.354939860532597, + "loss": 1.31663978099823, + "loss_ce": 0.008046085014939308, + "loss_iou": 0.46484375, + "loss_num": 0.0751953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 251163432, + "step": 3792 + }, + { + "epoch": 0.35503346281649273, + "grad_norm": 87.58589172363281, + "learning_rate": 5e-05, + "loss": 1.3667, + "num_input_tokens_seen": 251229820, + "step": 3793 + }, + { + "epoch": 0.35503346281649273, + "loss": 1.3160040378570557, + "loss_ce": 0.0039922627620399, + "loss_iou": 0.55078125, + "loss_num": 0.04150390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 251229820, + "step": 3793 + }, + { + "epoch": 0.35512706510038844, + "grad_norm": 20.974403381347656, + "learning_rate": 5e-05, + "loss": 1.3826, + "num_input_tokens_seen": 251295944, + "step": 3794 + }, + { + "epoch": 0.35512706510038844, + "loss": 1.4396576881408691, + "loss_ce": 0.006552317179739475, + "loss_iou": 0.5625, + "loss_num": 0.0625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 251295944, + "step": 3794 + }, + { + "epoch": 0.35522066738428415, + "grad_norm": 25.305522918701172, + "learning_rate": 5e-05, + "loss": 1.295, + "num_input_tokens_seen": 251361376, + "step": 3795 + }, + { + "epoch": 0.35522066738428415, + "loss": 1.5007115602493286, + "loss_ce": 0.005594349466264248, + "loss_iou": 0.625, + "loss_num": 0.04931640625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 251361376, + "step": 3795 + }, + { + "epoch": 0.3553142696681799, + "grad_norm": 33.90884780883789, + "learning_rate": 5e-05, + "loss": 1.2415, + "num_input_tokens_seen": 251426768, + "step": 3796 + }, + { + "epoch": 0.3553142696681799, + "loss": 1.0307388305664062, + "loss_ce": 0.0036392416805028915, + "loss_iou": 0.408203125, + "loss_num": 0.0419921875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 251426768, + "step": 3796 + }, + { + "epoch": 0.3554078719520756, + "grad_norm": 21.46078109741211, + "learning_rate": 5e-05, + "loss": 1.6657, + "num_input_tokens_seen": 251493444, + "step": 3797 + }, + { + "epoch": 0.3554078719520756, + "loss": 1.8991409540176392, + "loss_ce": 0.007539353333413601, + "loss_iou": 0.7734375, + "loss_num": 0.06982421875, + "loss_xval": 1.890625, + "num_input_tokens_seen": 251493444, + "step": 3797 + }, + { + "epoch": 0.35550147423597134, + "grad_norm": 41.06492614746094, + "learning_rate": 5e-05, + "loss": 1.3286, + "num_input_tokens_seen": 251559548, + "step": 3798 + }, + { + "epoch": 0.35550147423597134, + "loss": 1.3790137767791748, + "loss_ce": 0.009018702432513237, + "loss_iou": 0.53125, + "loss_num": 0.061279296875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 251559548, + "step": 3798 + }, + { + "epoch": 0.3555950765198671, + "grad_norm": 18.932714462280273, + "learning_rate": 5e-05, + "loss": 1.2403, + "num_input_tokens_seen": 251626720, + "step": 3799 + }, + { + "epoch": 0.3555950765198671, + "loss": 1.4121257066726685, + "loss_ce": 0.004899176768958569, + "loss_iou": 0.5625, + "loss_num": 0.057373046875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 251626720, + "step": 3799 + }, + { + "epoch": 0.3556886788037628, + "grad_norm": 28.769277572631836, + "learning_rate": 5e-05, + "loss": 1.4383, + "num_input_tokens_seen": 251693384, + "step": 3800 + }, + { + "epoch": 0.3556886788037628, + "loss": 1.4223759174346924, + "loss_ce": 0.006360397674143314, + "loss_iou": 0.6328125, + "loss_num": 0.0299072265625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 251693384, + "step": 3800 + }, + { + "epoch": 0.3557822810876585, + "grad_norm": 38.383880615234375, + "learning_rate": 5e-05, + "loss": 1.4703, + "num_input_tokens_seen": 251759756, + "step": 3801 + }, + { + "epoch": 0.3557822810876585, + "loss": 1.507293462753296, + "loss_ce": 0.008514214307069778, + "loss_iou": 0.59375, + "loss_num": 0.0625, + "loss_xval": 1.5, + "num_input_tokens_seen": 251759756, + "step": 3801 + }, + { + "epoch": 0.3558758833715543, + "grad_norm": 64.96527099609375, + "learning_rate": 5e-05, + "loss": 1.5875, + "num_input_tokens_seen": 251826760, + "step": 3802 + }, + { + "epoch": 0.3558758833715543, + "loss": 1.6287994384765625, + "loss_ce": 0.005264241714030504, + "loss_iou": 0.69140625, + "loss_num": 0.048828125, + "loss_xval": 1.625, + "num_input_tokens_seen": 251826760, + "step": 3802 + }, + { + "epoch": 0.35596948565545, + "grad_norm": 16.524049758911133, + "learning_rate": 5e-05, + "loss": 1.2465, + "num_input_tokens_seen": 251893856, + "step": 3803 + }, + { + "epoch": 0.35596948565545, + "loss": 1.3341472148895264, + "loss_ce": 0.009928441606462002, + "loss_iou": 0.5625, + "loss_num": 0.04052734375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 251893856, + "step": 3803 + }, + { + "epoch": 0.3560630879393457, + "grad_norm": 28.4807186126709, + "learning_rate": 5e-05, + "loss": 1.4655, + "num_input_tokens_seen": 251960148, + "step": 3804 + }, + { + "epoch": 0.3560630879393457, + "loss": 1.4297295808792114, + "loss_ce": 0.012249134480953217, + "loss_iou": 0.53515625, + "loss_num": 0.0693359375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 251960148, + "step": 3804 + }, + { + "epoch": 0.3561566902232414, + "grad_norm": 42.3966064453125, + "learning_rate": 5e-05, + "loss": 1.5877, + "num_input_tokens_seen": 252024984, + "step": 3805 + }, + { + "epoch": 0.3561566902232414, + "loss": 1.5378053188323975, + "loss_ce": 0.004602145403623581, + "loss_iou": 0.61328125, + "loss_num": 0.060546875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 252024984, + "step": 3805 + }, + { + "epoch": 0.3562502925071372, + "grad_norm": 22.70017433166504, + "learning_rate": 5e-05, + "loss": 1.7721, + "num_input_tokens_seen": 252089500, + "step": 3806 + }, + { + "epoch": 0.3562502925071372, + "loss": 1.7175476551055908, + "loss_ce": 0.004656947683542967, + "loss_iou": 0.7265625, + "loss_num": 0.0517578125, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 252089500, + "step": 3806 + }, + { + "epoch": 0.3563438947910329, + "grad_norm": 19.666152954101562, + "learning_rate": 5e-05, + "loss": 1.1383, + "num_input_tokens_seen": 252156364, + "step": 3807 + }, + { + "epoch": 0.3563438947910329, + "loss": 1.2264556884765625, + "loss_ce": 0.0042877038940787315, + "loss_iou": 0.515625, + "loss_num": 0.037841796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 252156364, + "step": 3807 + }, + { + "epoch": 0.3564374970749286, + "grad_norm": 45.91120910644531, + "learning_rate": 5e-05, + "loss": 1.1452, + "num_input_tokens_seen": 252222760, + "step": 3808 + }, + { + "epoch": 0.3564374970749286, + "loss": 0.9723122715950012, + "loss_ce": 0.006980276666581631, + "loss_iou": 0.435546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 252222760, + "step": 3808 + }, + { + "epoch": 0.3565310993588244, + "grad_norm": 24.510021209716797, + "learning_rate": 5e-05, + "loss": 1.5332, + "num_input_tokens_seen": 252289256, + "step": 3809 + }, + { + "epoch": 0.3565310993588244, + "loss": 1.5503010749816895, + "loss_ce": 0.0034260577522218227, + "loss_iou": 0.58203125, + "loss_num": 0.07666015625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 252289256, + "step": 3809 + }, + { + "epoch": 0.3566247016427201, + "grad_norm": 21.58144187927246, + "learning_rate": 5e-05, + "loss": 1.2941, + "num_input_tokens_seen": 252356080, + "step": 3810 + }, + { + "epoch": 0.3566247016427201, + "loss": 1.3713481426239014, + "loss_ce": 0.0036723411176353693, + "loss_iou": 0.57421875, + "loss_num": 0.04443359375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 252356080, + "step": 3810 + }, + { + "epoch": 0.3567183039266158, + "grad_norm": 33.67427062988281, + "learning_rate": 5e-05, + "loss": 1.566, + "num_input_tokens_seen": 252422348, + "step": 3811 + }, + { + "epoch": 0.3567183039266158, + "loss": 1.4817874431610107, + "loss_ce": 0.009131135419011116, + "loss_iou": 0.54296875, + "loss_num": 0.0771484375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 252422348, + "step": 3811 + }, + { + "epoch": 0.3568119062105115, + "grad_norm": 25.521656036376953, + "learning_rate": 5e-05, + "loss": 1.3677, + "num_input_tokens_seen": 252488916, + "step": 3812 + }, + { + "epoch": 0.3568119062105115, + "loss": 1.4877207279205322, + "loss_ce": 0.0067636920139193535, + "loss_iou": 0.61328125, + "loss_num": 0.05126953125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 252488916, + "step": 3812 + }, + { + "epoch": 0.3569055084944073, + "grad_norm": 31.854557037353516, + "learning_rate": 5e-05, + "loss": 1.3124, + "num_input_tokens_seen": 252556832, + "step": 3813 + }, + { + "epoch": 0.3569055084944073, + "loss": 1.2551075220108032, + "loss_ce": 0.0031543918885290623, + "loss_iou": 0.54296875, + "loss_num": 0.032470703125, + "loss_xval": 1.25, + "num_input_tokens_seen": 252556832, + "step": 3813 + }, + { + "epoch": 0.356999110778303, + "grad_norm": 26.8541316986084, + "learning_rate": 5e-05, + "loss": 1.3146, + "num_input_tokens_seen": 252623132, + "step": 3814 + }, + { + "epoch": 0.356999110778303, + "loss": 1.3905422687530518, + "loss_ce": 0.004800091963261366, + "loss_iou": 0.55078125, + "loss_num": 0.057373046875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 252623132, + "step": 3814 + }, + { + "epoch": 0.3570927130621987, + "grad_norm": 42.99372100830078, + "learning_rate": 5e-05, + "loss": 1.4607, + "num_input_tokens_seen": 252689232, + "step": 3815 + }, + { + "epoch": 0.3570927130621987, + "loss": 1.4563833475112915, + "loss_ce": 0.007408723700791597, + "loss_iou": 0.58984375, + "loss_num": 0.052978515625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 252689232, + "step": 3815 + }, + { + "epoch": 0.35718631534609446, + "grad_norm": 35.9421272277832, + "learning_rate": 5e-05, + "loss": 1.2842, + "num_input_tokens_seen": 252755568, + "step": 3816 + }, + { + "epoch": 0.35718631534609446, + "loss": 1.2683870792388916, + "loss_ce": 0.004226885735988617, + "loss_iou": 0.5625, + "loss_num": 0.0277099609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 252755568, + "step": 3816 + }, + { + "epoch": 0.3572799176299902, + "grad_norm": 29.325708389282227, + "learning_rate": 5e-05, + "loss": 1.5757, + "num_input_tokens_seen": 252821712, + "step": 3817 + }, + { + "epoch": 0.3572799176299902, + "loss": 1.375547170639038, + "loss_ce": 0.0049416664987802505, + "loss_iou": 0.5234375, + "loss_num": 0.06396484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 252821712, + "step": 3817 + }, + { + "epoch": 0.3573735199138859, + "grad_norm": 18.662654876708984, + "learning_rate": 5e-05, + "loss": 1.0818, + "num_input_tokens_seen": 252887648, + "step": 3818 + }, + { + "epoch": 0.3573735199138859, + "loss": 1.1648759841918945, + "loss_ce": 0.004231431521475315, + "loss_iou": 0.4765625, + "loss_num": 0.04150390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 252887648, + "step": 3818 + }, + { + "epoch": 0.35746712219778165, + "grad_norm": 31.385486602783203, + "learning_rate": 5e-05, + "loss": 1.3327, + "num_input_tokens_seen": 252953428, + "step": 3819 + }, + { + "epoch": 0.35746712219778165, + "loss": 1.429532766342163, + "loss_ce": 0.0027750488370656967, + "loss_iou": 0.5546875, + "loss_num": 0.0634765625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 252953428, + "step": 3819 + }, + { + "epoch": 0.35756072448167736, + "grad_norm": 21.94999122619629, + "learning_rate": 5e-05, + "loss": 1.437, + "num_input_tokens_seen": 253019704, + "step": 3820 + }, + { + "epoch": 0.35756072448167736, + "loss": 1.5294301509857178, + "loss_ce": 0.006480982061475515, + "loss_iou": 0.671875, + "loss_num": 0.035400390625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 253019704, + "step": 3820 + }, + { + "epoch": 0.35765432676557307, + "grad_norm": 16.596799850463867, + "learning_rate": 5e-05, + "loss": 1.0904, + "num_input_tokens_seen": 253085800, + "step": 3821 + }, + { + "epoch": 0.35765432676557307, + "loss": 1.227621078491211, + "loss_ce": 0.0035000182688236237, + "loss_iou": 0.53125, + "loss_num": 0.0322265625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 253085800, + "step": 3821 + }, + { + "epoch": 0.3577479290494688, + "grad_norm": 23.880023956298828, + "learning_rate": 5e-05, + "loss": 1.5784, + "num_input_tokens_seen": 253152092, + "step": 3822 + }, + { + "epoch": 0.3577479290494688, + "loss": 1.4667890071868896, + "loss_ce": 0.002433606656268239, + "loss_iou": 0.58203125, + "loss_num": 0.06005859375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 253152092, + "step": 3822 + }, + { + "epoch": 0.35784153133336455, + "grad_norm": 26.221240997314453, + "learning_rate": 5e-05, + "loss": 1.2829, + "num_input_tokens_seen": 253217216, + "step": 3823 + }, + { + "epoch": 0.35784153133336455, + "loss": 1.206403136253357, + "loss_ce": 0.005963684059679508, + "loss_iou": 0.482421875, + "loss_num": 0.047119140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 253217216, + "step": 3823 + }, + { + "epoch": 0.35793513361726026, + "grad_norm": 27.62700843811035, + "learning_rate": 5e-05, + "loss": 1.5616, + "num_input_tokens_seen": 253283996, + "step": 3824 + }, + { + "epoch": 0.35793513361726026, + "loss": 1.5621179342269897, + "loss_ce": 0.0045007579028606415, + "loss_iou": 0.64453125, + "loss_num": 0.054443359375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 253283996, + "step": 3824 + }, + { + "epoch": 0.35802873590115597, + "grad_norm": 58.04174041748047, + "learning_rate": 5e-05, + "loss": 1.412, + "num_input_tokens_seen": 253350792, + "step": 3825 + }, + { + "epoch": 0.35802873590115597, + "loss": 1.4483146667480469, + "loss_ce": 0.008373213931918144, + "loss_iou": 0.58984375, + "loss_num": 0.0517578125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 253350792, + "step": 3825 + }, + { + "epoch": 0.35812233818505174, + "grad_norm": 26.386741638183594, + "learning_rate": 5e-05, + "loss": 1.7399, + "num_input_tokens_seen": 253417320, + "step": 3826 + }, + { + "epoch": 0.35812233818505174, + "loss": 1.763947606086731, + "loss_ce": 0.007111691869795322, + "loss_iou": 0.703125, + "loss_num": 0.0693359375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 253417320, + "step": 3826 + }, + { + "epoch": 0.35821594046894745, + "grad_norm": 22.723888397216797, + "learning_rate": 5e-05, + "loss": 1.1559, + "num_input_tokens_seen": 253482760, + "step": 3827 + }, + { + "epoch": 0.35821594046894745, + "loss": 0.9857834577560425, + "loss_ce": 0.005680882837623358, + "loss_iou": 0.38671875, + "loss_num": 0.041259765625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 253482760, + "step": 3827 + }, + { + "epoch": 0.35830954275284316, + "grad_norm": 105.30680847167969, + "learning_rate": 5e-05, + "loss": 1.3322, + "num_input_tokens_seen": 253549452, + "step": 3828 + }, + { + "epoch": 0.35830954275284316, + "loss": 1.2688713073730469, + "loss_ce": 0.006847410928457975, + "loss_iou": 0.490234375, + "loss_num": 0.056884765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 253549452, + "step": 3828 + }, + { + "epoch": 0.3584031450367389, + "grad_norm": 29.074954986572266, + "learning_rate": 5e-05, + "loss": 1.3919, + "num_input_tokens_seen": 253613672, + "step": 3829 + }, + { + "epoch": 0.3584031450367389, + "loss": 1.4572618007659912, + "loss_ce": 0.004136784002184868, + "loss_iou": 0.58984375, + "loss_num": 0.054443359375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 253613672, + "step": 3829 + }, + { + "epoch": 0.35849674732063463, + "grad_norm": 30.948087692260742, + "learning_rate": 5e-05, + "loss": 1.3421, + "num_input_tokens_seen": 253679756, + "step": 3830 + }, + { + "epoch": 0.35849674732063463, + "loss": 1.2343542575836182, + "loss_ce": 0.004129580222070217, + "loss_iou": 0.46484375, + "loss_num": 0.06005859375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 253679756, + "step": 3830 + }, + { + "epoch": 0.35859034960453035, + "grad_norm": 25.996593475341797, + "learning_rate": 5e-05, + "loss": 1.4863, + "num_input_tokens_seen": 253746732, + "step": 3831 + }, + { + "epoch": 0.35859034960453035, + "loss": 1.2896075248718262, + "loss_ce": 0.0029864534735679626, + "loss_iou": 0.56640625, + "loss_num": 0.031494140625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 253746732, + "step": 3831 + }, + { + "epoch": 0.35868395188842606, + "grad_norm": 50.55592346191406, + "learning_rate": 5e-05, + "loss": 1.2749, + "num_input_tokens_seen": 253812632, + "step": 3832 + }, + { + "epoch": 0.35868395188842606, + "loss": 1.3609986305236816, + "loss_ce": 0.009436029940843582, + "loss_iou": 0.5625, + "loss_num": 0.044677734375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 253812632, + "step": 3832 + }, + { + "epoch": 0.3587775541723218, + "grad_norm": 22.35114097595215, + "learning_rate": 5e-05, + "loss": 1.3814, + "num_input_tokens_seen": 253879212, + "step": 3833 + }, + { + "epoch": 0.3587775541723218, + "loss": 1.462552547454834, + "loss_ce": 0.004300536587834358, + "loss_iou": 0.5703125, + "loss_num": 0.064453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 253879212, + "step": 3833 + }, + { + "epoch": 0.35887115645621753, + "grad_norm": 26.133445739746094, + "learning_rate": 5e-05, + "loss": 1.6415, + "num_input_tokens_seen": 253945172, + "step": 3834 + }, + { + "epoch": 0.35887115645621753, + "loss": 1.7571624517440796, + "loss_ce": 0.007162506692111492, + "loss_iou": 0.6796875, + "loss_num": 0.078125, + "loss_xval": 1.75, + "num_input_tokens_seen": 253945172, + "step": 3834 + }, + { + "epoch": 0.35896475874011324, + "grad_norm": 27.23897361755371, + "learning_rate": 5e-05, + "loss": 1.3649, + "num_input_tokens_seen": 254011844, + "step": 3835 + }, + { + "epoch": 0.35896475874011324, + "loss": 1.5204861164093018, + "loss_ce": 0.006814256310462952, + "loss_iou": 0.6171875, + "loss_num": 0.056396484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 254011844, + "step": 3835 + }, + { + "epoch": 0.359058361024009, + "grad_norm": 34.14655303955078, + "learning_rate": 5e-05, + "loss": 1.3203, + "num_input_tokens_seen": 254077760, + "step": 3836 + }, + { + "epoch": 0.359058361024009, + "loss": 1.3540546894073486, + "loss_ce": 0.005421890877187252, + "loss_iou": 0.5625, + "loss_num": 0.045654296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 254077760, + "step": 3836 + }, + { + "epoch": 0.3591519633079047, + "grad_norm": 23.973108291625977, + "learning_rate": 5e-05, + "loss": 1.6667, + "num_input_tokens_seen": 254144368, + "step": 3837 + }, + { + "epoch": 0.3591519633079047, + "loss": 1.9346356391906738, + "loss_ce": 0.0029950684402137995, + "loss_iou": 0.7578125, + "loss_num": 0.0830078125, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 254144368, + "step": 3837 + }, + { + "epoch": 0.35924556559180043, + "grad_norm": 14.81295108795166, + "learning_rate": 5e-05, + "loss": 1.5248, + "num_input_tokens_seen": 254210724, + "step": 3838 + }, + { + "epoch": 0.35924556559180043, + "loss": 1.3920135498046875, + "loss_ce": 0.003829952096566558, + "loss_iou": 0.5234375, + "loss_num": 0.068359375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 254210724, + "step": 3838 + }, + { + "epoch": 0.35933916787569614, + "grad_norm": 228.7640838623047, + "learning_rate": 5e-05, + "loss": 1.4907, + "num_input_tokens_seen": 254277704, + "step": 3839 + }, + { + "epoch": 0.35933916787569614, + "loss": 1.335458755493164, + "loss_ce": 0.01221649069339037, + "loss_iou": 0.5078125, + "loss_num": 0.061279296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 254277704, + "step": 3839 + }, + { + "epoch": 0.3594327701595919, + "grad_norm": 30.683616638183594, + "learning_rate": 5e-05, + "loss": 1.3883, + "num_input_tokens_seen": 254343496, + "step": 3840 + }, + { + "epoch": 0.3594327701595919, + "loss": 1.5232099294662476, + "loss_ce": 0.005143512040376663, + "loss_iou": 0.58984375, + "loss_num": 0.06689453125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 254343496, + "step": 3840 + }, + { + "epoch": 0.3595263724434876, + "grad_norm": 62.56199264526367, + "learning_rate": 5e-05, + "loss": 1.6315, + "num_input_tokens_seen": 254410352, + "step": 3841 + }, + { + "epoch": 0.3595263724434876, + "loss": 1.6729618310928345, + "loss_ce": 0.004016554448753595, + "loss_iou": 0.6796875, + "loss_num": 0.061279296875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 254410352, + "step": 3841 + }, + { + "epoch": 0.35961997472738333, + "grad_norm": 34.9542350769043, + "learning_rate": 5e-05, + "loss": 1.5272, + "num_input_tokens_seen": 254476636, + "step": 3842 + }, + { + "epoch": 0.35961997472738333, + "loss": 1.5351890325546265, + "loss_ce": 0.007845314219594002, + "loss_iou": 0.625, + "loss_num": 0.054931640625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 254476636, + "step": 3842 + }, + { + "epoch": 0.3597135770112791, + "grad_norm": 30.81777000427246, + "learning_rate": 5e-05, + "loss": 1.437, + "num_input_tokens_seen": 254541704, + "step": 3843 + }, + { + "epoch": 0.3597135770112791, + "loss": 1.4094040393829346, + "loss_ce": 0.012065219692885876, + "loss_iou": 0.54296875, + "loss_num": 0.0625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 254541704, + "step": 3843 + }, + { + "epoch": 0.3598071792951748, + "grad_norm": 33.980743408203125, + "learning_rate": 5e-05, + "loss": 1.5379, + "num_input_tokens_seen": 254607920, + "step": 3844 + }, + { + "epoch": 0.3598071792951748, + "loss": 1.6039977073669434, + "loss_ce": 0.0073180063627660275, + "loss_iou": 0.671875, + "loss_num": 0.050048828125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 254607920, + "step": 3844 + }, + { + "epoch": 0.3599007815790705, + "grad_norm": 25.925189971923828, + "learning_rate": 5e-05, + "loss": 1.3263, + "num_input_tokens_seen": 254674336, + "step": 3845 + }, + { + "epoch": 0.3599007815790705, + "loss": 1.3964755535125732, + "loss_ce": 0.007803751155734062, + "loss_iou": 0.5625, + "loss_num": 0.05322265625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 254674336, + "step": 3845 + }, + { + "epoch": 0.3599943838629663, + "grad_norm": 41.20928955078125, + "learning_rate": 5e-05, + "loss": 1.6059, + "num_input_tokens_seen": 254741228, + "step": 3846 + }, + { + "epoch": 0.3599943838629663, + "loss": 1.5326600074768066, + "loss_ce": 0.008245894685387611, + "loss_iou": 0.6328125, + "loss_num": 0.0517578125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 254741228, + "step": 3846 + }, + { + "epoch": 0.360087986146862, + "grad_norm": 27.70813751220703, + "learning_rate": 5e-05, + "loss": 1.3411, + "num_input_tokens_seen": 254807580, + "step": 3847 + }, + { + "epoch": 0.360087986146862, + "loss": 1.4202702045440674, + "loss_ce": 0.0042546410113573074, + "loss_iou": 0.59765625, + "loss_num": 0.0439453125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 254807580, + "step": 3847 + }, + { + "epoch": 0.3601815884307577, + "grad_norm": 31.373220443725586, + "learning_rate": 5e-05, + "loss": 1.1782, + "num_input_tokens_seen": 254873264, + "step": 3848 + }, + { + "epoch": 0.3601815884307577, + "loss": 1.2204126119613647, + "loss_ce": 0.008986882865428925, + "loss_iou": 0.48828125, + "loss_num": 0.04736328125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 254873264, + "step": 3848 + }, + { + "epoch": 0.3602751907146534, + "grad_norm": 29.523340225219727, + "learning_rate": 5e-05, + "loss": 1.1972, + "num_input_tokens_seen": 254939124, + "step": 3849 + }, + { + "epoch": 0.3602751907146534, + "loss": 1.0339815616607666, + "loss_ce": 0.003952270373702049, + "loss_iou": 0.43359375, + "loss_num": 0.0322265625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 254939124, + "step": 3849 + }, + { + "epoch": 0.3603687929985492, + "grad_norm": 35.41106414794922, + "learning_rate": 5e-05, + "loss": 1.3877, + "num_input_tokens_seen": 255004984, + "step": 3850 + }, + { + "epoch": 0.3603687929985492, + "loss": 1.5489107370376587, + "loss_ce": 0.010824768804013729, + "loss_iou": 0.6015625, + "loss_num": 0.06640625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 255004984, + "step": 3850 + }, + { + "epoch": 0.3604623952824449, + "grad_norm": 32.25589370727539, + "learning_rate": 5e-05, + "loss": 1.2475, + "num_input_tokens_seen": 255072140, + "step": 3851 + }, + { + "epoch": 0.3604623952824449, + "loss": 1.139512538909912, + "loss_ce": 0.007676601409912109, + "loss_iou": 0.46484375, + "loss_num": 0.040283203125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 255072140, + "step": 3851 + }, + { + "epoch": 0.3605559975663406, + "grad_norm": 40.44350051879883, + "learning_rate": 5e-05, + "loss": 1.3483, + "num_input_tokens_seen": 255137500, + "step": 3852 + }, + { + "epoch": 0.3605559975663406, + "loss": 1.35080885887146, + "loss_ce": 0.003640816081315279, + "loss_iou": 0.57421875, + "loss_num": 0.03955078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 255137500, + "step": 3852 + }, + { + "epoch": 0.36064959985023637, + "grad_norm": 26.932327270507812, + "learning_rate": 5e-05, + "loss": 1.4373, + "num_input_tokens_seen": 255204320, + "step": 3853 + }, + { + "epoch": 0.36064959985023637, + "loss": 1.6209089756011963, + "loss_ce": 0.004698014352470636, + "loss_iou": 0.640625, + "loss_num": 0.06689453125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 255204320, + "step": 3853 + }, + { + "epoch": 0.3607432021341321, + "grad_norm": 23.202775955200195, + "learning_rate": 5e-05, + "loss": 1.3226, + "num_input_tokens_seen": 255270512, + "step": 3854 + }, + { + "epoch": 0.3607432021341321, + "loss": 1.0814577341079712, + "loss_ce": 0.004553454462438822, + "loss_iou": 0.4140625, + "loss_num": 0.04931640625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 255270512, + "step": 3854 + }, + { + "epoch": 0.3608368044180278, + "grad_norm": 41.50984573364258, + "learning_rate": 5e-05, + "loss": 1.2359, + "num_input_tokens_seen": 255337016, + "step": 3855 + }, + { + "epoch": 0.3608368044180278, + "loss": 1.0581815242767334, + "loss_ce": 0.0034940862096846104, + "loss_iou": 0.44921875, + "loss_num": 0.0308837890625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 255337016, + "step": 3855 + }, + { + "epoch": 0.3609304067019235, + "grad_norm": 24.14653778076172, + "learning_rate": 5e-05, + "loss": 1.6067, + "num_input_tokens_seen": 255403040, + "step": 3856 + }, + { + "epoch": 0.3609304067019235, + "loss": 1.564652681350708, + "loss_ce": 0.004594052210450172, + "loss_iou": 0.6328125, + "loss_num": 0.058349609375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 255403040, + "step": 3856 + }, + { + "epoch": 0.36102400898581927, + "grad_norm": 26.012975692749023, + "learning_rate": 5e-05, + "loss": 1.2269, + "num_input_tokens_seen": 255468232, + "step": 3857 + }, + { + "epoch": 0.36102400898581927, + "loss": 1.012009620666504, + "loss_ce": 0.006028190720826387, + "loss_iou": 0.435546875, + "loss_num": 0.02734375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 255468232, + "step": 3857 + }, + { + "epoch": 0.361117611269715, + "grad_norm": 25.54911994934082, + "learning_rate": 5e-05, + "loss": 1.4046, + "num_input_tokens_seen": 255533860, + "step": 3858 + }, + { + "epoch": 0.361117611269715, + "loss": 1.2875566482543945, + "loss_ce": 0.008748022839426994, + "loss_iou": 0.51171875, + "loss_num": 0.051513671875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 255533860, + "step": 3858 + }, + { + "epoch": 0.3612112135536107, + "grad_norm": 29.165939331054688, + "learning_rate": 5e-05, + "loss": 1.2991, + "num_input_tokens_seen": 255599948, + "step": 3859 + }, + { + "epoch": 0.3612112135536107, + "loss": 1.2389352321624756, + "loss_ce": 0.014814192429184914, + "loss_iou": 0.50390625, + "loss_num": 0.042724609375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 255599948, + "step": 3859 + }, + { + "epoch": 0.36130481583750645, + "grad_norm": 31.390108108520508, + "learning_rate": 5e-05, + "loss": 1.2778, + "num_input_tokens_seen": 255667236, + "step": 3860 + }, + { + "epoch": 0.36130481583750645, + "loss": 1.1914269924163818, + "loss_ce": 0.0039269146509468555, + "loss_iou": 0.53515625, + "loss_num": 0.023193359375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 255667236, + "step": 3860 + }, + { + "epoch": 0.36139841812140217, + "grad_norm": 23.40657615661621, + "learning_rate": 5e-05, + "loss": 1.3771, + "num_input_tokens_seen": 255734296, + "step": 3861 + }, + { + "epoch": 0.36139841812140217, + "loss": 1.4665780067443848, + "loss_ce": 0.009058399125933647, + "loss_iou": 0.578125, + "loss_num": 0.06103515625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 255734296, + "step": 3861 + }, + { + "epoch": 0.3614920204052979, + "grad_norm": 12.270401954650879, + "learning_rate": 5e-05, + "loss": 1.1437, + "num_input_tokens_seen": 255800104, + "step": 3862 + }, + { + "epoch": 0.3614920204052979, + "loss": 1.3889422416687012, + "loss_ce": 0.006617933511734009, + "loss_iou": 0.54296875, + "loss_num": 0.0595703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 255800104, + "step": 3862 + }, + { + "epoch": 0.36158562268919364, + "grad_norm": 16.934629440307617, + "learning_rate": 5e-05, + "loss": 1.1744, + "num_input_tokens_seen": 255866968, + "step": 3863 + }, + { + "epoch": 0.36158562268919364, + "loss": 1.3468637466430664, + "loss_ce": 0.005066880490630865, + "loss_iou": 0.53125, + "loss_num": 0.05517578125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 255866968, + "step": 3863 + }, + { + "epoch": 0.36167922497308935, + "grad_norm": 25.67149543762207, + "learning_rate": 5e-05, + "loss": 1.36, + "num_input_tokens_seen": 255932840, + "step": 3864 + }, + { + "epoch": 0.36167922497308935, + "loss": 1.3157559633255005, + "loss_ce": 0.009115353226661682, + "loss_iou": 0.51171875, + "loss_num": 0.055908203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 255932840, + "step": 3864 + }, + { + "epoch": 0.36177282725698506, + "grad_norm": 20.666156768798828, + "learning_rate": 5e-05, + "loss": 1.1782, + "num_input_tokens_seen": 255999628, + "step": 3865 + }, + { + "epoch": 0.36177282725698506, + "loss": 1.127396583557129, + "loss_ce": 0.008744290098547935, + "loss_iou": 0.46875, + "loss_num": 0.035888671875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 255999628, + "step": 3865 + }, + { + "epoch": 0.3618664295408808, + "grad_norm": 16.260723114013672, + "learning_rate": 5e-05, + "loss": 1.3397, + "num_input_tokens_seen": 256066584, + "step": 3866 + }, + { + "epoch": 0.3618664295408808, + "loss": 1.4446632862091064, + "loss_ce": 0.007651643827557564, + "loss_iou": 0.5703125, + "loss_num": 0.059814453125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 256066584, + "step": 3866 + }, + { + "epoch": 0.36196003182477654, + "grad_norm": 18.712108612060547, + "learning_rate": 5e-05, + "loss": 1.3027, + "num_input_tokens_seen": 256132528, + "step": 3867 + }, + { + "epoch": 0.36196003182477654, + "loss": 1.4516030550003052, + "loss_ce": 0.003849088679999113, + "loss_iou": 0.58984375, + "loss_num": 0.05322265625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 256132528, + "step": 3867 + }, + { + "epoch": 0.36205363410867225, + "grad_norm": 25.798871994018555, + "learning_rate": 5e-05, + "loss": 1.2465, + "num_input_tokens_seen": 256199148, + "step": 3868 + }, + { + "epoch": 0.36205363410867225, + "loss": 1.1591289043426514, + "loss_ce": 0.006785160396248102, + "loss_iou": 0.5, + "loss_num": 0.0296630859375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 256199148, + "step": 3868 + }, + { + "epoch": 0.36214723639256796, + "grad_norm": 57.71001434326172, + "learning_rate": 5e-05, + "loss": 1.5598, + "num_input_tokens_seen": 256265288, + "step": 3869 + }, + { + "epoch": 0.36214723639256796, + "loss": 1.2412936687469482, + "loss_ce": 0.006430388428270817, + "loss_iou": 0.5390625, + "loss_num": 0.03125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 256265288, + "step": 3869 + }, + { + "epoch": 0.36224083867646373, + "grad_norm": 21.811450958251953, + "learning_rate": 5e-05, + "loss": 1.4648, + "num_input_tokens_seen": 256332536, + "step": 3870 + }, + { + "epoch": 0.36224083867646373, + "loss": 1.3369035720825195, + "loss_ce": 0.011708246544003487, + "loss_iou": 0.53515625, + "loss_num": 0.05078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 256332536, + "step": 3870 + }, + { + "epoch": 0.36233444096035944, + "grad_norm": 25.570297241210938, + "learning_rate": 5e-05, + "loss": 1.3582, + "num_input_tokens_seen": 256398532, + "step": 3871 + }, + { + "epoch": 0.36233444096035944, + "loss": 1.2315714359283447, + "loss_ce": 0.01013591792434454, + "loss_iou": 0.490234375, + "loss_num": 0.04833984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 256398532, + "step": 3871 + }, + { + "epoch": 0.36242804324425515, + "grad_norm": 49.26054382324219, + "learning_rate": 5e-05, + "loss": 1.2921, + "num_input_tokens_seen": 256465032, + "step": 3872 + }, + { + "epoch": 0.36242804324425515, + "loss": 1.2738597393035889, + "loss_ce": 0.0033519864082336426, + "loss_iou": 0.53515625, + "loss_num": 0.04052734375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 256465032, + "step": 3872 + }, + { + "epoch": 0.36252164552815086, + "grad_norm": 18.672164916992188, + "learning_rate": 5e-05, + "loss": 1.6078, + "num_input_tokens_seen": 256531388, + "step": 3873 + }, + { + "epoch": 0.36252164552815086, + "loss": 1.5311250686645508, + "loss_ce": 0.005734493024647236, + "loss_iou": 0.62890625, + "loss_num": 0.05322265625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 256531388, + "step": 3873 + }, + { + "epoch": 0.3626152478120466, + "grad_norm": 22.86305809020996, + "learning_rate": 5e-05, + "loss": 1.1378, + "num_input_tokens_seen": 256597248, + "step": 3874 + }, + { + "epoch": 0.3626152478120466, + "loss": 1.235906720161438, + "loss_ce": 0.00641454104334116, + "loss_iou": 0.50390625, + "loss_num": 0.044921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 256597248, + "step": 3874 + }, + { + "epoch": 0.36270885009594234, + "grad_norm": 26.60591697692871, + "learning_rate": 5e-05, + "loss": 1.2531, + "num_input_tokens_seen": 256663520, + "step": 3875 + }, + { + "epoch": 0.36270885009594234, + "loss": 1.0908284187316895, + "loss_ce": 0.0034260787069797516, + "loss_iou": 0.42578125, + "loss_num": 0.04736328125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 256663520, + "step": 3875 + }, + { + "epoch": 0.36280245237983805, + "grad_norm": 24.526897430419922, + "learning_rate": 5e-05, + "loss": 1.243, + "num_input_tokens_seen": 256731112, + "step": 3876 + }, + { + "epoch": 0.36280245237983805, + "loss": 1.449812650680542, + "loss_ce": 0.005476716905832291, + "loss_iou": 0.60546875, + "loss_num": 0.04638671875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 256731112, + "step": 3876 + }, + { + "epoch": 0.3628960546637338, + "grad_norm": 36.438655853271484, + "learning_rate": 5e-05, + "loss": 1.4344, + "num_input_tokens_seen": 256796960, + "step": 3877 + }, + { + "epoch": 0.3628960546637338, + "loss": 1.41978919506073, + "loss_ce": 0.005238380283117294, + "loss_iou": 0.55859375, + "loss_num": 0.059326171875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 256796960, + "step": 3877 + }, + { + "epoch": 0.3629896569476295, + "grad_norm": 22.20621109008789, + "learning_rate": 5e-05, + "loss": 1.7951, + "num_input_tokens_seen": 256861344, + "step": 3878 + }, + { + "epoch": 0.3629896569476295, + "loss": 1.8474003076553345, + "loss_ce": 0.009021367877721786, + "loss_iou": 0.7265625, + "loss_num": 0.07666015625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 256861344, + "step": 3878 + }, + { + "epoch": 0.36308325923152523, + "grad_norm": 18.223386764526367, + "learning_rate": 5e-05, + "loss": 1.2966, + "num_input_tokens_seen": 256928064, + "step": 3879 + }, + { + "epoch": 0.36308325923152523, + "loss": 1.348849892616272, + "loss_ce": 0.004123359452933073, + "loss_iou": 0.546875, + "loss_num": 0.050048828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 256928064, + "step": 3879 + }, + { + "epoch": 0.363176861515421, + "grad_norm": 24.631162643432617, + "learning_rate": 5e-05, + "loss": 1.3253, + "num_input_tokens_seen": 256993304, + "step": 3880 + }, + { + "epoch": 0.363176861515421, + "loss": 1.3371837139129639, + "loss_ce": 0.007105658762156963, + "loss_iou": 0.546875, + "loss_num": 0.0478515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 256993304, + "step": 3880 + }, + { + "epoch": 0.3632704637993167, + "grad_norm": 156.11648559570312, + "learning_rate": 5e-05, + "loss": 1.6287, + "num_input_tokens_seen": 257059136, + "step": 3881 + }, + { + "epoch": 0.3632704637993167, + "loss": 1.4368083477020264, + "loss_ce": 0.004191184416413307, + "loss_iou": 0.61328125, + "loss_num": 0.041015625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 257059136, + "step": 3881 + }, + { + "epoch": 0.3633640660832124, + "grad_norm": 33.807151794433594, + "learning_rate": 5e-05, + "loss": 1.3767, + "num_input_tokens_seen": 257126408, + "step": 3882 + }, + { + "epoch": 0.3633640660832124, + "loss": 1.403756856918335, + "loss_ce": 0.003366295015439391, + "loss_iou": 0.62109375, + "loss_num": 0.031982421875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 257126408, + "step": 3882 + }, + { + "epoch": 0.36345766836710813, + "grad_norm": 29.032533645629883, + "learning_rate": 5e-05, + "loss": 1.188, + "num_input_tokens_seen": 257193128, + "step": 3883 + }, + { + "epoch": 0.36345766836710813, + "loss": 1.0745892524719238, + "loss_ce": 0.007694760337471962, + "loss_iou": 0.421875, + "loss_num": 0.044677734375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 257193128, + "step": 3883 + }, + { + "epoch": 0.3635512706510039, + "grad_norm": 30.084447860717773, + "learning_rate": 5e-05, + "loss": 1.3535, + "num_input_tokens_seen": 257260136, + "step": 3884 + }, + { + "epoch": 0.3635512706510039, + "loss": 1.5438709259033203, + "loss_ce": 0.00773820374161005, + "loss_iou": 0.609375, + "loss_num": 0.0634765625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 257260136, + "step": 3884 + }, + { + "epoch": 0.3636448729348996, + "grad_norm": 28.8676700592041, + "learning_rate": 5e-05, + "loss": 1.4723, + "num_input_tokens_seen": 257326596, + "step": 3885 + }, + { + "epoch": 0.3636448729348996, + "loss": 1.5852420330047607, + "loss_ce": 0.007117072120308876, + "loss_iou": 0.63671875, + "loss_num": 0.061279296875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 257326596, + "step": 3885 + }, + { + "epoch": 0.3637384752187953, + "grad_norm": 214.13739013671875, + "learning_rate": 5e-05, + "loss": 1.3138, + "num_input_tokens_seen": 257392704, + "step": 3886 + }, + { + "epoch": 0.3637384752187953, + "loss": 1.3630067110061646, + "loss_ce": 0.007049663923680782, + "loss_iou": 0.5, + "loss_num": 0.0712890625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 257392704, + "step": 3886 + }, + { + "epoch": 0.3638320775026911, + "grad_norm": 32.66344451904297, + "learning_rate": 5e-05, + "loss": 1.4655, + "num_input_tokens_seen": 257459588, + "step": 3887 + }, + { + "epoch": 0.3638320775026911, + "loss": 1.50942063331604, + "loss_ce": 0.007467404939234257, + "loss_iou": 0.60546875, + "loss_num": 0.0576171875, + "loss_xval": 1.5, + "num_input_tokens_seen": 257459588, + "step": 3887 + }, + { + "epoch": 0.3639256797865868, + "grad_norm": 28.34696388244629, + "learning_rate": 5e-05, + "loss": 1.5062, + "num_input_tokens_seen": 257525372, + "step": 3888 + }, + { + "epoch": 0.3639256797865868, + "loss": 1.5711674690246582, + "loss_ce": 0.005737648345530033, + "loss_iou": 0.60546875, + "loss_num": 0.0712890625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 257525372, + "step": 3888 + }, + { + "epoch": 0.3640192820704825, + "grad_norm": 13.845455169677734, + "learning_rate": 5e-05, + "loss": 1.1894, + "num_input_tokens_seen": 257590204, + "step": 3889 + }, + { + "epoch": 0.3640192820704825, + "loss": 0.9258078336715698, + "loss_ce": 0.0051535069942474365, + "loss_iou": 0.294921875, + "loss_num": 0.06640625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 257590204, + "step": 3889 + }, + { + "epoch": 0.3641128843543783, + "grad_norm": 32.85861587524414, + "learning_rate": 5e-05, + "loss": 1.1474, + "num_input_tokens_seen": 257655680, + "step": 3890 + }, + { + "epoch": 0.3641128843543783, + "loss": 1.2459192276000977, + "loss_ce": 0.008156735450029373, + "loss_iou": 0.490234375, + "loss_num": 0.05126953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 257655680, + "step": 3890 + }, + { + "epoch": 0.364206486638274, + "grad_norm": 40.10943603515625, + "learning_rate": 5e-05, + "loss": 1.4291, + "num_input_tokens_seen": 257722332, + "step": 3891 + }, + { + "epoch": 0.364206486638274, + "loss": 1.4412177801132202, + "loss_ce": 0.01153024472296238, + "loss_iou": 0.56640625, + "loss_num": 0.059326171875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 257722332, + "step": 3891 + }, + { + "epoch": 0.3643000889221697, + "grad_norm": 35.25660705566406, + "learning_rate": 5e-05, + "loss": 1.5353, + "num_input_tokens_seen": 257789960, + "step": 3892 + }, + { + "epoch": 0.3643000889221697, + "loss": 1.5436816215515137, + "loss_ce": 0.0026659530121833086, + "loss_iou": 0.62890625, + "loss_num": 0.05712890625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 257789960, + "step": 3892 + }, + { + "epoch": 0.3643936912060654, + "grad_norm": 24.885868072509766, + "learning_rate": 5e-05, + "loss": 1.2971, + "num_input_tokens_seen": 257856628, + "step": 3893 + }, + { + "epoch": 0.3643936912060654, + "loss": 1.2822798490524292, + "loss_ce": 0.006706084590405226, + "loss_iou": 0.53125, + "loss_num": 0.04296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 257856628, + "step": 3893 + }, + { + "epoch": 0.3644872934899612, + "grad_norm": 23.392274856567383, + "learning_rate": 5e-05, + "loss": 1.2484, + "num_input_tokens_seen": 257922620, + "step": 3894 + }, + { + "epoch": 0.3644872934899612, + "loss": 1.484807014465332, + "loss_ce": 0.005803174804896116, + "loss_iou": 0.578125, + "loss_num": 0.064453125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 257922620, + "step": 3894 + }, + { + "epoch": 0.3645808957738569, + "grad_norm": 35.21171188354492, + "learning_rate": 5e-05, + "loss": 1.584, + "num_input_tokens_seen": 257987672, + "step": 3895 + }, + { + "epoch": 0.3645808957738569, + "loss": 1.5609768629074097, + "loss_ce": 0.0038479208014905453, + "loss_iou": 0.609375, + "loss_num": 0.068359375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 257987672, + "step": 3895 + }, + { + "epoch": 0.3646744980577526, + "grad_norm": 41.75724411010742, + "learning_rate": 5e-05, + "loss": 1.3437, + "num_input_tokens_seen": 258053940, + "step": 3896 + }, + { + "epoch": 0.3646744980577526, + "loss": 1.4538421630859375, + "loss_ce": 0.015548745170235634, + "loss_iou": 0.5234375, + "loss_num": 0.078125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 258053940, + "step": 3896 + }, + { + "epoch": 0.36476810034164836, + "grad_norm": 25.527973175048828, + "learning_rate": 5e-05, + "loss": 1.2668, + "num_input_tokens_seen": 258120848, + "step": 3897 + }, + { + "epoch": 0.36476810034164836, + "loss": 1.237147569656372, + "loss_ce": 0.003260722616687417, + "loss_iou": 0.5, + "loss_num": 0.046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 258120848, + "step": 3897 + }, + { + "epoch": 0.36486170262554407, + "grad_norm": 29.957181930541992, + "learning_rate": 5e-05, + "loss": 1.4001, + "num_input_tokens_seen": 258186440, + "step": 3898 + }, + { + "epoch": 0.36486170262554407, + "loss": 1.521573781967163, + "loss_ce": 0.0039955200627446175, + "loss_iou": 0.61328125, + "loss_num": 0.0576171875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 258186440, + "step": 3898 + }, + { + "epoch": 0.3649553049094398, + "grad_norm": 17.0150146484375, + "learning_rate": 5e-05, + "loss": 1.2839, + "num_input_tokens_seen": 258253036, + "step": 3899 + }, + { + "epoch": 0.3649553049094398, + "loss": 1.1094952821731567, + "loss_ce": 0.005003111902624369, + "loss_iou": 0.423828125, + "loss_num": 0.051025390625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 258253036, + "step": 3899 + }, + { + "epoch": 0.3650489071933355, + "grad_norm": 28.997066497802734, + "learning_rate": 5e-05, + "loss": 1.4532, + "num_input_tokens_seen": 258318316, + "step": 3900 + }, + { + "epoch": 0.3650489071933355, + "loss": 1.4568918943405151, + "loss_ce": 0.008649641647934914, + "loss_iou": 0.55078125, + "loss_num": 0.0693359375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 258318316, + "step": 3900 + }, + { + "epoch": 0.36514250947723126, + "grad_norm": 33.862464904785156, + "learning_rate": 5e-05, + "loss": 1.3942, + "num_input_tokens_seen": 258384308, + "step": 3901 + }, + { + "epoch": 0.36514250947723126, + "loss": 1.2920153141021729, + "loss_ce": 0.0034410918597131968, + "loss_iou": 0.498046875, + "loss_num": 0.058349609375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 258384308, + "step": 3901 + }, + { + "epoch": 0.36523611176112697, + "grad_norm": 16.475595474243164, + "learning_rate": 5e-05, + "loss": 1.326, + "num_input_tokens_seen": 258449096, + "step": 3902 + }, + { + "epoch": 0.36523611176112697, + "loss": 1.1843986511230469, + "loss_ce": 0.0037345124874264, + "loss_iou": 0.421875, + "loss_num": 0.0673828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 258449096, + "step": 3902 + }, + { + "epoch": 0.3653297140450227, + "grad_norm": 10.042855262756348, + "learning_rate": 5e-05, + "loss": 1.1307, + "num_input_tokens_seen": 258513448, + "step": 3903 + }, + { + "epoch": 0.3653297140450227, + "loss": 1.134523868560791, + "loss_ce": 0.007326656952500343, + "loss_iou": 0.458984375, + "loss_num": 0.0419921875, + "loss_xval": 1.125, + "num_input_tokens_seen": 258513448, + "step": 3903 + }, + { + "epoch": 0.36542331632891845, + "grad_norm": 29.135862350463867, + "learning_rate": 5e-05, + "loss": 1.2811, + "num_input_tokens_seen": 258580004, + "step": 3904 + }, + { + "epoch": 0.36542331632891845, + "loss": 1.1003613471984863, + "loss_ce": 0.005207505542784929, + "loss_iou": 0.443359375, + "loss_num": 0.0419921875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 258580004, + "step": 3904 + }, + { + "epoch": 0.36551691861281416, + "grad_norm": 21.72974395751953, + "learning_rate": 5e-05, + "loss": 1.6188, + "num_input_tokens_seen": 258644024, + "step": 3905 + }, + { + "epoch": 0.36551691861281416, + "loss": 1.5372267961502075, + "loss_ce": 0.00792991928756237, + "loss_iou": 0.65234375, + "loss_num": 0.044189453125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 258644024, + "step": 3905 + }, + { + "epoch": 0.36561052089670987, + "grad_norm": 18.686946868896484, + "learning_rate": 5e-05, + "loss": 1.446, + "num_input_tokens_seen": 258709872, + "step": 3906 + }, + { + "epoch": 0.36561052089670987, + "loss": 1.4138548374176025, + "loss_ce": 0.0032103601843118668, + "loss_iou": 0.5703125, + "loss_num": 0.05322265625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 258709872, + "step": 3906 + }, + { + "epoch": 0.36570412318060563, + "grad_norm": 25.584678649902344, + "learning_rate": 5e-05, + "loss": 1.349, + "num_input_tokens_seen": 258776216, + "step": 3907 + }, + { + "epoch": 0.36570412318060563, + "loss": 1.3176180124282837, + "loss_ce": 0.005606233142316341, + "loss_iou": 0.5234375, + "loss_num": 0.052490234375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 258776216, + "step": 3907 + }, + { + "epoch": 0.36579772546450134, + "grad_norm": 40.10686492919922, + "learning_rate": 5e-05, + "loss": 1.3434, + "num_input_tokens_seen": 258842412, + "step": 3908 + }, + { + "epoch": 0.36579772546450134, + "loss": 1.4787346124649048, + "loss_ce": 0.0036369068548083305, + "loss_iou": 0.5859375, + "loss_num": 0.059814453125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 258842412, + "step": 3908 + }, + { + "epoch": 0.36589132774839705, + "grad_norm": 24.690710067749023, + "learning_rate": 5e-05, + "loss": 1.4947, + "num_input_tokens_seen": 258909164, + "step": 3909 + }, + { + "epoch": 0.36589132774839705, + "loss": 1.6424697637557983, + "loss_ce": 0.005750999320298433, + "loss_iou": 0.625, + "loss_num": 0.07666015625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 258909164, + "step": 3909 + }, + { + "epoch": 0.36598493003229277, + "grad_norm": 16.426416397094727, + "learning_rate": 5e-05, + "loss": 1.2203, + "num_input_tokens_seen": 258975068, + "step": 3910 + }, + { + "epoch": 0.36598493003229277, + "loss": 1.1377646923065186, + "loss_ce": 0.005806789733469486, + "loss_iou": 0.435546875, + "loss_num": 0.052490234375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 258975068, + "step": 3910 + }, + { + "epoch": 0.36607853231618853, + "grad_norm": 19.121076583862305, + "learning_rate": 5e-05, + "loss": 1.0438, + "num_input_tokens_seen": 259041128, + "step": 3911 + }, + { + "epoch": 0.36607853231618853, + "loss": 1.300186276435852, + "loss_ce": 0.0033112491946667433, + "loss_iou": 0.55078125, + "loss_num": 0.03955078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 259041128, + "step": 3911 + }, + { + "epoch": 0.36617213460008424, + "grad_norm": 19.31698989868164, + "learning_rate": 5e-05, + "loss": 1.4742, + "num_input_tokens_seen": 259107748, + "step": 3912 + }, + { + "epoch": 0.36617213460008424, + "loss": 1.4676434993743896, + "loss_ce": 0.014518586918711662, + "loss_iou": 0.546875, + "loss_num": 0.07177734375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 259107748, + "step": 3912 + }, + { + "epoch": 0.36626573688397995, + "grad_norm": 25.95240592956543, + "learning_rate": 5e-05, + "loss": 1.3572, + "num_input_tokens_seen": 259175184, + "step": 3913 + }, + { + "epoch": 0.36626573688397995, + "loss": 1.4779455661773682, + "loss_ce": 0.0033362722024321556, + "loss_iou": 0.6484375, + "loss_num": 0.035888671875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 259175184, + "step": 3913 + }, + { + "epoch": 0.3663593391678757, + "grad_norm": 22.275863647460938, + "learning_rate": 5e-05, + "loss": 1.3261, + "num_input_tokens_seen": 259240856, + "step": 3914 + }, + { + "epoch": 0.3663593391678757, + "loss": 1.2703871726989746, + "loss_ce": 0.008668376132845879, + "loss_iou": 0.498046875, + "loss_num": 0.0537109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 259240856, + "step": 3914 + }, + { + "epoch": 0.36645294145177143, + "grad_norm": 28.913101196289062, + "learning_rate": 5e-05, + "loss": 1.2511, + "num_input_tokens_seen": 259307000, + "step": 3915 + }, + { + "epoch": 0.36645294145177143, + "loss": 1.1929950714111328, + "loss_ce": 0.009401226416230202, + "loss_iou": 0.49609375, + "loss_num": 0.038330078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 259307000, + "step": 3915 + }, + { + "epoch": 0.36654654373566714, + "grad_norm": 22.642305374145508, + "learning_rate": 5e-05, + "loss": 1.4163, + "num_input_tokens_seen": 259374700, + "step": 3916 + }, + { + "epoch": 0.36654654373566714, + "loss": 1.435065507888794, + "loss_ce": 0.006354633718729019, + "loss_iou": 0.57421875, + "loss_num": 0.05615234375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 259374700, + "step": 3916 + }, + { + "epoch": 0.36664014601956285, + "grad_norm": 30.0216064453125, + "learning_rate": 5e-05, + "loss": 1.2547, + "num_input_tokens_seen": 259441668, + "step": 3917 + }, + { + "epoch": 0.36664014601956285, + "loss": 1.1637648344039917, + "loss_ce": 0.004096841439604759, + "loss_iou": 0.486328125, + "loss_num": 0.037109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 259441668, + "step": 3917 + }, + { + "epoch": 0.3667337483034586, + "grad_norm": 25.223445892333984, + "learning_rate": 5e-05, + "loss": 1.4944, + "num_input_tokens_seen": 259507604, + "step": 3918 + }, + { + "epoch": 0.3667337483034586, + "loss": 1.3250019550323486, + "loss_ce": 0.0022480469197034836, + "loss_iou": 0.58984375, + "loss_num": 0.029052734375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 259507604, + "step": 3918 + }, + { + "epoch": 0.36682735058735433, + "grad_norm": 14.87253475189209, + "learning_rate": 5e-05, + "loss": 1.1651, + "num_input_tokens_seen": 259573948, + "step": 3919 + }, + { + "epoch": 0.36682735058735433, + "loss": 1.2377943992614746, + "loss_ce": 0.0034194160252809525, + "loss_iou": 0.498046875, + "loss_num": 0.047607421875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 259573948, + "step": 3919 + }, + { + "epoch": 0.36692095287125004, + "grad_norm": 15.379536628723145, + "learning_rate": 5e-05, + "loss": 1.2461, + "num_input_tokens_seen": 259639676, + "step": 3920 + }, + { + "epoch": 0.36692095287125004, + "loss": 1.3444726467132568, + "loss_ce": 0.0031641186214983463, + "loss_iou": 0.54296875, + "loss_num": 0.05029296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 259639676, + "step": 3920 + }, + { + "epoch": 0.3670145551551458, + "grad_norm": 24.076215744018555, + "learning_rate": 5e-05, + "loss": 1.262, + "num_input_tokens_seen": 259705404, + "step": 3921 + }, + { + "epoch": 0.3670145551551458, + "loss": 1.3675591945648193, + "loss_ce": 0.00427787471562624, + "loss_iou": 0.515625, + "loss_num": 0.06640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 259705404, + "step": 3921 + }, + { + "epoch": 0.3671081574390415, + "grad_norm": 25.591129302978516, + "learning_rate": 5e-05, + "loss": 1.3342, + "num_input_tokens_seen": 259771784, + "step": 3922 + }, + { + "epoch": 0.3671081574390415, + "loss": 1.3885796070098877, + "loss_ce": 0.006743676960468292, + "loss_iou": 0.5625, + "loss_num": 0.05126953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 259771784, + "step": 3922 + }, + { + "epoch": 0.3672017597229372, + "grad_norm": 42.1247444152832, + "learning_rate": 5e-05, + "loss": 1.4726, + "num_input_tokens_seen": 259838688, + "step": 3923 + }, + { + "epoch": 0.3672017597229372, + "loss": 1.4543354511260986, + "loss_ce": 0.0038959342055022717, + "loss_iou": 0.59765625, + "loss_num": 0.051025390625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 259838688, + "step": 3923 + }, + { + "epoch": 0.367295362006833, + "grad_norm": 25.162288665771484, + "learning_rate": 5e-05, + "loss": 1.4083, + "num_input_tokens_seen": 259904864, + "step": 3924 + }, + { + "epoch": 0.367295362006833, + "loss": 1.4487223625183105, + "loss_ce": 0.0043863835744559765, + "loss_iou": 0.609375, + "loss_num": 0.046142578125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 259904864, + "step": 3924 + }, + { + "epoch": 0.3673889642907287, + "grad_norm": 46.03004455566406, + "learning_rate": 5e-05, + "loss": 1.1118, + "num_input_tokens_seen": 259971360, + "step": 3925 + }, + { + "epoch": 0.3673889642907287, + "loss": 1.2532507181167603, + "loss_ce": 0.00813352596014738, + "loss_iou": 0.53125, + "loss_num": 0.036376953125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 259971360, + "step": 3925 + }, + { + "epoch": 0.3674825665746244, + "grad_norm": 29.814109802246094, + "learning_rate": 5e-05, + "loss": 1.4138, + "num_input_tokens_seen": 260036788, + "step": 3926 + }, + { + "epoch": 0.3674825665746244, + "loss": 1.447306752204895, + "loss_ce": 0.009318475611507893, + "loss_iou": 0.515625, + "loss_num": 0.0810546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 260036788, + "step": 3926 + }, + { + "epoch": 0.3675761688585201, + "grad_norm": 39.045223236083984, + "learning_rate": 5e-05, + "loss": 1.1283, + "num_input_tokens_seen": 260103620, + "step": 3927 + }, + { + "epoch": 0.3675761688585201, + "loss": 1.1129131317138672, + "loss_ce": 0.0030498034320771694, + "loss_iou": 0.48828125, + "loss_num": 0.0267333984375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 260103620, + "step": 3927 + }, + { + "epoch": 0.3676697711424159, + "grad_norm": 26.382869720458984, + "learning_rate": 5e-05, + "loss": 1.5512, + "num_input_tokens_seen": 260170292, + "step": 3928 + }, + { + "epoch": 0.3676697711424159, + "loss": 1.6524462699890137, + "loss_ce": 0.0049853757955133915, + "loss_iou": 0.671875, + "loss_num": 0.06103515625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 260170292, + "step": 3928 + }, + { + "epoch": 0.3677633734263116, + "grad_norm": 16.269216537475586, + "learning_rate": 5e-05, + "loss": 1.1346, + "num_input_tokens_seen": 260236180, + "step": 3929 + }, + { + "epoch": 0.3677633734263116, + "loss": 0.9752902984619141, + "loss_ce": 0.004343067295849323, + "loss_iou": 0.359375, + "loss_num": 0.05078125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 260236180, + "step": 3929 + }, + { + "epoch": 0.3678569757102073, + "grad_norm": 32.700626373291016, + "learning_rate": 5e-05, + "loss": 1.3044, + "num_input_tokens_seen": 260301688, + "step": 3930 + }, + { + "epoch": 0.3678569757102073, + "loss": 1.1879197359085083, + "loss_ce": 0.002861207351088524, + "loss_iou": 0.466796875, + "loss_num": 0.050048828125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 260301688, + "step": 3930 + }, + { + "epoch": 0.3679505779941031, + "grad_norm": 109.71320343017578, + "learning_rate": 5e-05, + "loss": 1.3784, + "num_input_tokens_seen": 260368680, + "step": 3931 + }, + { + "epoch": 0.3679505779941031, + "loss": 1.4181301593780518, + "loss_ce": 0.006509024649858475, + "loss_iou": 0.578125, + "loss_num": 0.0517578125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 260368680, + "step": 3931 + }, + { + "epoch": 0.3680441802779988, + "grad_norm": 25.38262939453125, + "learning_rate": 5e-05, + "loss": 1.3802, + "num_input_tokens_seen": 260435616, + "step": 3932 + }, + { + "epoch": 0.3680441802779988, + "loss": 1.368844747543335, + "loss_ce": 0.006540108472108841, + "loss_iou": 0.55078125, + "loss_num": 0.052978515625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 260435616, + "step": 3932 + }, + { + "epoch": 0.3681377825618945, + "grad_norm": 21.631601333618164, + "learning_rate": 5e-05, + "loss": 1.4052, + "num_input_tokens_seen": 260501660, + "step": 3933 + }, + { + "epoch": 0.3681377825618945, + "loss": 1.1894268989562988, + "loss_ce": 0.005344804376363754, + "loss_iou": 0.48828125, + "loss_num": 0.04150390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 260501660, + "step": 3933 + }, + { + "epoch": 0.3682313848457902, + "grad_norm": 28.03546142578125, + "learning_rate": 5e-05, + "loss": 1.158, + "num_input_tokens_seen": 260568136, + "step": 3934 + }, + { + "epoch": 0.3682313848457902, + "loss": 1.268270492553711, + "loss_ce": 0.006307664327323437, + "loss_iou": 0.5, + "loss_num": 0.05224609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 260568136, + "step": 3934 + }, + { + "epoch": 0.368324987129686, + "grad_norm": 54.29316711425781, + "learning_rate": 5e-05, + "loss": 1.4128, + "num_input_tokens_seen": 260634212, + "step": 3935 + }, + { + "epoch": 0.368324987129686, + "loss": 1.3329976797103882, + "loss_ce": 0.014150056056678295, + "loss_iou": 0.54296875, + "loss_num": 0.047119140625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 260634212, + "step": 3935 + }, + { + "epoch": 0.3684185894135817, + "grad_norm": 35.369625091552734, + "learning_rate": 5e-05, + "loss": 1.4614, + "num_input_tokens_seen": 260700212, + "step": 3936 + }, + { + "epoch": 0.3684185894135817, + "loss": 1.37261962890625, + "loss_ce": 0.013732925057411194, + "loss_iou": 0.57421875, + "loss_num": 0.041748046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 260700212, + "step": 3936 + }, + { + "epoch": 0.3685121916974774, + "grad_norm": 32.84611892700195, + "learning_rate": 5e-05, + "loss": 1.5433, + "num_input_tokens_seen": 260765612, + "step": 3937 + }, + { + "epoch": 0.3685121916974774, + "loss": 1.389090895652771, + "loss_ce": 0.005301805678755045, + "loss_iou": 0.515625, + "loss_num": 0.0712890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 260765612, + "step": 3937 + }, + { + "epoch": 0.36860579398137316, + "grad_norm": 36.469139099121094, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 260832048, + "step": 3938 + }, + { + "epoch": 0.36860579398137316, + "loss": 1.1461762189865112, + "loss_ce": 0.005551266483962536, + "loss_iou": 0.474609375, + "loss_num": 0.037841796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 260832048, + "step": 3938 + }, + { + "epoch": 0.3686993962652689, + "grad_norm": 25.36920738220215, + "learning_rate": 5e-05, + "loss": 1.4403, + "num_input_tokens_seen": 260897948, + "step": 3939 + }, + { + "epoch": 0.3686993962652689, + "loss": 1.6238946914672852, + "loss_ce": 0.0037775335367769003, + "loss_iou": 0.64453125, + "loss_num": 0.06591796875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 260897948, + "step": 3939 + }, + { + "epoch": 0.3687929985491646, + "grad_norm": 18.976686477661133, + "learning_rate": 5e-05, + "loss": 1.1858, + "num_input_tokens_seen": 260963644, + "step": 3940 + }, + { + "epoch": 0.3687929985491646, + "loss": 1.3617618083953857, + "loss_ce": 0.003363432828336954, + "loss_iou": 0.53125, + "loss_num": 0.060302734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 260963644, + "step": 3940 + }, + { + "epoch": 0.36888660083306035, + "grad_norm": 208.07705688476562, + "learning_rate": 5e-05, + "loss": 1.4929, + "num_input_tokens_seen": 261028620, + "step": 3941 + }, + { + "epoch": 0.36888660083306035, + "loss": 1.8821766376495361, + "loss_ce": 0.009129858575761318, + "loss_iou": 0.71484375, + "loss_num": 0.087890625, + "loss_xval": 1.875, + "num_input_tokens_seen": 261028620, + "step": 3941 + }, + { + "epoch": 0.36898020311695606, + "grad_norm": 23.991506576538086, + "learning_rate": 5e-05, + "loss": 1.3835, + "num_input_tokens_seen": 261095508, + "step": 3942 + }, + { + "epoch": 0.36898020311695606, + "loss": 1.3920793533325195, + "loss_ce": 0.006825482007116079, + "loss_iou": 0.52734375, + "loss_num": 0.06591796875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 261095508, + "step": 3942 + }, + { + "epoch": 0.3690738054008518, + "grad_norm": 22.969532012939453, + "learning_rate": 5e-05, + "loss": 1.539, + "num_input_tokens_seen": 261161432, + "step": 3943 + }, + { + "epoch": 0.3690738054008518, + "loss": 1.3845458030700684, + "loss_ce": 0.0041747367940843105, + "loss_iou": 0.58984375, + "loss_num": 0.039306640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 261161432, + "step": 3943 + }, + { + "epoch": 0.3691674076847475, + "grad_norm": 21.72538185119629, + "learning_rate": 5e-05, + "loss": 1.1539, + "num_input_tokens_seen": 261228076, + "step": 3944 + }, + { + "epoch": 0.3691674076847475, + "loss": 1.1648333072662354, + "loss_ce": 0.005409496836364269, + "loss_iou": 0.5, + "loss_num": 0.0322265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 261228076, + "step": 3944 + }, + { + "epoch": 0.36926100996864325, + "grad_norm": 22.333606719970703, + "learning_rate": 5e-05, + "loss": 1.4191, + "num_input_tokens_seen": 261292968, + "step": 3945 + }, + { + "epoch": 0.36926100996864325, + "loss": 1.445363998413086, + "loss_ce": 0.0061549958772957325, + "loss_iou": 0.59375, + "loss_num": 0.05029296875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 261292968, + "step": 3945 + }, + { + "epoch": 0.36935461225253896, + "grad_norm": 38.30065155029297, + "learning_rate": 5e-05, + "loss": 1.3555, + "num_input_tokens_seen": 261359140, + "step": 3946 + }, + { + "epoch": 0.36935461225253896, + "loss": 1.4409079551696777, + "loss_ce": 0.006337637081742287, + "loss_iou": 0.58203125, + "loss_num": 0.053955078125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 261359140, + "step": 3946 + }, + { + "epoch": 0.36944821453643467, + "grad_norm": 24.800479888916016, + "learning_rate": 5e-05, + "loss": 1.5593, + "num_input_tokens_seen": 261425624, + "step": 3947 + }, + { + "epoch": 0.36944821453643467, + "loss": 1.5257058143615723, + "loss_ce": 0.006174529902637005, + "loss_iou": 0.609375, + "loss_num": 0.059326171875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 261425624, + "step": 3947 + }, + { + "epoch": 0.36954181682033044, + "grad_norm": 19.440298080444336, + "learning_rate": 5e-05, + "loss": 1.4042, + "num_input_tokens_seen": 261492212, + "step": 3948 + }, + { + "epoch": 0.36954181682033044, + "loss": 1.3907049894332886, + "loss_ce": 0.007160104811191559, + "loss_iou": 0.52734375, + "loss_num": 0.06494140625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 261492212, + "step": 3948 + }, + { + "epoch": 0.36963541910422615, + "grad_norm": 37.813629150390625, + "learning_rate": 5e-05, + "loss": 1.3949, + "num_input_tokens_seen": 261559560, + "step": 3949 + }, + { + "epoch": 0.36963541910422615, + "loss": 1.3426899909973145, + "loss_ce": 0.0033344775438308716, + "loss_iou": 0.546875, + "loss_num": 0.048583984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 261559560, + "step": 3949 + }, + { + "epoch": 0.36972902138812186, + "grad_norm": 25.93182373046875, + "learning_rate": 5e-05, + "loss": 1.5264, + "num_input_tokens_seen": 261626104, + "step": 3950 + }, + { + "epoch": 0.36972902138812186, + "loss": 1.5038950443267822, + "loss_ce": 0.006824803072959185, + "loss_iou": 0.6171875, + "loss_num": 0.052978515625, + "loss_xval": 1.5, + "num_input_tokens_seen": 261626104, + "step": 3950 + }, + { + "epoch": 0.36982262367201757, + "grad_norm": 11.437479019165039, + "learning_rate": 5e-05, + "loss": 1.0759, + "num_input_tokens_seen": 261691932, + "step": 3951 + }, + { + "epoch": 0.36982262367201757, + "loss": 0.9897888898849487, + "loss_ce": 0.007489139214158058, + "loss_iou": 0.40625, + "loss_num": 0.033447265625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 261691932, + "step": 3951 + }, + { + "epoch": 0.36991622595591334, + "grad_norm": 25.98269271850586, + "learning_rate": 5e-05, + "loss": 1.4724, + "num_input_tokens_seen": 261758472, + "step": 3952 + }, + { + "epoch": 0.36991622595591334, + "loss": 1.594347596168518, + "loss_ce": 0.005480426829308271, + "loss_iou": 0.625, + "loss_num": 0.0673828125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 261758472, + "step": 3952 + }, + { + "epoch": 0.37000982823980905, + "grad_norm": 35.442134857177734, + "learning_rate": 5e-05, + "loss": 1.1802, + "num_input_tokens_seen": 261826040, + "step": 3953 + }, + { + "epoch": 0.37000982823980905, + "loss": 1.3480236530303955, + "loss_ce": 0.007691656239330769, + "loss_iou": 0.55859375, + "loss_num": 0.044189453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 261826040, + "step": 3953 + }, + { + "epoch": 0.37010343052370476, + "grad_norm": 19.181516647338867, + "learning_rate": 5e-05, + "loss": 1.3268, + "num_input_tokens_seen": 261892000, + "step": 3954 + }, + { + "epoch": 0.37010343052370476, + "loss": 1.2413418292999268, + "loss_ce": 0.004525455180555582, + "loss_iou": 0.5078125, + "loss_num": 0.043701171875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 261892000, + "step": 3954 + }, + { + "epoch": 0.3701970328076005, + "grad_norm": 51.0396614074707, + "learning_rate": 5e-05, + "loss": 1.2316, + "num_input_tokens_seen": 261958300, + "step": 3955 + }, + { + "epoch": 0.3701970328076005, + "loss": 1.1863930225372314, + "loss_ce": 0.004019945859909058, + "loss_iou": 0.423828125, + "loss_num": 0.0673828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 261958300, + "step": 3955 + }, + { + "epoch": 0.37029063509149623, + "grad_norm": 31.92195701599121, + "learning_rate": 5e-05, + "loss": 1.1183, + "num_input_tokens_seen": 262024588, + "step": 3956 + }, + { + "epoch": 0.37029063509149623, + "loss": 0.9922685623168945, + "loss_ce": 0.0041093844920396805, + "loss_iou": 0.41015625, + "loss_num": 0.033447265625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 262024588, + "step": 3956 + }, + { + "epoch": 0.37038423737539194, + "grad_norm": 43.49807357788086, + "learning_rate": 5e-05, + "loss": 1.1621, + "num_input_tokens_seen": 262090060, + "step": 3957 + }, + { + "epoch": 0.37038423737539194, + "loss": 1.233185052871704, + "loss_ce": 0.007599052041769028, + "loss_iou": 0.486328125, + "loss_num": 0.051025390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 262090060, + "step": 3957 + }, + { + "epoch": 0.3704778396592877, + "grad_norm": 36.92182540893555, + "learning_rate": 5e-05, + "loss": 1.4606, + "num_input_tokens_seen": 262157232, + "step": 3958 + }, + { + "epoch": 0.3704778396592877, + "loss": 1.4280641078948975, + "loss_ce": 0.006189106963574886, + "loss_iou": 0.546875, + "loss_num": 0.06494140625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 262157232, + "step": 3958 + }, + { + "epoch": 0.3705714419431834, + "grad_norm": 20.42865753173828, + "learning_rate": 5e-05, + "loss": 1.5222, + "num_input_tokens_seen": 262224816, + "step": 3959 + }, + { + "epoch": 0.3705714419431834, + "loss": 1.3759833574295044, + "loss_ce": 0.004889609292149544, + "loss_iou": 0.578125, + "loss_num": 0.043212890625, + "loss_xval": 1.375, + "num_input_tokens_seen": 262224816, + "step": 3959 + }, + { + "epoch": 0.37066504422707913, + "grad_norm": 22.276254653930664, + "learning_rate": 5e-05, + "loss": 1.4538, + "num_input_tokens_seen": 262290680, + "step": 3960 + }, + { + "epoch": 0.37066504422707913, + "loss": 1.602579116821289, + "loss_ce": 0.007364316843450069, + "loss_iou": 0.62890625, + "loss_num": 0.06787109375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 262290680, + "step": 3960 + }, + { + "epoch": 0.37075864651097484, + "grad_norm": 45.19758224487305, + "learning_rate": 5e-05, + "loss": 1.6121, + "num_input_tokens_seen": 262357400, + "step": 3961 + }, + { + "epoch": 0.37075864651097484, + "loss": 1.6432125568389893, + "loss_ce": 0.006005595438182354, + "loss_iou": 0.65234375, + "loss_num": 0.06689453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 262357400, + "step": 3961 + }, + { + "epoch": 0.3708522487948706, + "grad_norm": 33.26939392089844, + "learning_rate": 5e-05, + "loss": 1.5507, + "num_input_tokens_seen": 262423420, + "step": 3962 + }, + { + "epoch": 0.3708522487948706, + "loss": 1.7359198331832886, + "loss_ce": 0.006427627522498369, + "loss_iou": 0.7265625, + "loss_num": 0.055419921875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 262423420, + "step": 3962 + }, + { + "epoch": 0.3709458510787663, + "grad_norm": 23.176490783691406, + "learning_rate": 5e-05, + "loss": 1.464, + "num_input_tokens_seen": 262489300, + "step": 3963 + }, + { + "epoch": 0.3709458510787663, + "loss": 1.648054599761963, + "loss_ce": 0.008406110107898712, + "loss_iou": 0.6796875, + "loss_num": 0.056396484375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 262489300, + "step": 3963 + }, + { + "epoch": 0.37103945336266203, + "grad_norm": 29.751508712768555, + "learning_rate": 5e-05, + "loss": 1.4418, + "num_input_tokens_seen": 262556360, + "step": 3964 + }, + { + "epoch": 0.37103945336266203, + "loss": 1.2887781858444214, + "loss_ce": 0.0055750226601958275, + "loss_iou": 0.51953125, + "loss_num": 0.049560546875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 262556360, + "step": 3964 + }, + { + "epoch": 0.3711330556465578, + "grad_norm": 22.3258113861084, + "learning_rate": 5e-05, + "loss": 1.1992, + "num_input_tokens_seen": 262622520, + "step": 3965 + }, + { + "epoch": 0.3711330556465578, + "loss": 0.9840934872627258, + "loss_ce": 0.0028922846540808678, + "loss_iou": 0.4296875, + "loss_num": 0.0244140625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 262622520, + "step": 3965 + }, + { + "epoch": 0.3712266579304535, + "grad_norm": 35.426761627197266, + "learning_rate": 5e-05, + "loss": 1.2399, + "num_input_tokens_seen": 262687972, + "step": 3966 + }, + { + "epoch": 0.3712266579304535, + "loss": 1.333057165145874, + "loss_ce": 0.005908791907131672, + "loss_iou": 0.5, + "loss_num": 0.06494140625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 262687972, + "step": 3966 + }, + { + "epoch": 0.3713202602143492, + "grad_norm": 33.05916213989258, + "learning_rate": 5e-05, + "loss": 1.5817, + "num_input_tokens_seen": 262754496, + "step": 3967 + }, + { + "epoch": 0.3713202602143492, + "loss": 1.429577112197876, + "loss_ce": 0.006237310823053122, + "loss_iou": 0.59765625, + "loss_num": 0.046142578125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 262754496, + "step": 3967 + }, + { + "epoch": 0.371413862498245, + "grad_norm": 30.960010528564453, + "learning_rate": 5e-05, + "loss": 1.3217, + "num_input_tokens_seen": 262821360, + "step": 3968 + }, + { + "epoch": 0.371413862498245, + "loss": 1.3256900310516357, + "loss_ce": 0.003912715706974268, + "loss_iou": 0.5625, + "loss_num": 0.039794921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 262821360, + "step": 3968 + }, + { + "epoch": 0.3715074647821407, + "grad_norm": 24.339763641357422, + "learning_rate": 5e-05, + "loss": 1.3748, + "num_input_tokens_seen": 262888432, + "step": 3969 + }, + { + "epoch": 0.3715074647821407, + "loss": 1.5881636142730713, + "loss_ce": 0.006132287904620171, + "loss_iou": 0.625, + "loss_num": 0.0673828125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 262888432, + "step": 3969 + }, + { + "epoch": 0.3716010670660364, + "grad_norm": 15.922314643859863, + "learning_rate": 5e-05, + "loss": 1.3201, + "num_input_tokens_seen": 262953548, + "step": 3970 + }, + { + "epoch": 0.3716010670660364, + "loss": 1.4539998769760132, + "loss_ce": 0.05067954212427139, + "loss_iou": 0.5078125, + "loss_num": 0.07763671875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 262953548, + "step": 3970 + }, + { + "epoch": 0.3716946693499321, + "grad_norm": 33.7575569152832, + "learning_rate": 5e-05, + "loss": 1.0576, + "num_input_tokens_seen": 263019548, + "step": 3971 + }, + { + "epoch": 0.3716946693499321, + "loss": 1.1322962045669556, + "loss_ce": 0.005831355229020119, + "loss_iou": 0.4453125, + "loss_num": 0.046630859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 263019548, + "step": 3971 + }, + { + "epoch": 0.3717882716338279, + "grad_norm": 26.21764373779297, + "learning_rate": 5e-05, + "loss": 1.2614, + "num_input_tokens_seen": 263086704, + "step": 3972 + }, + { + "epoch": 0.3717882716338279, + "loss": 1.081892728805542, + "loss_ce": 0.0047443886287510395, + "loss_iou": 0.46875, + "loss_num": 0.0277099609375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 263086704, + "step": 3972 + }, + { + "epoch": 0.3718818739177236, + "grad_norm": 52.61083984375, + "learning_rate": 5e-05, + "loss": 1.4271, + "num_input_tokens_seen": 263154148, + "step": 3973 + }, + { + "epoch": 0.3718818739177236, + "loss": 1.5574123859405518, + "loss_ce": 0.004678058438003063, + "loss_iou": 0.6484375, + "loss_num": 0.05126953125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 263154148, + "step": 3973 + }, + { + "epoch": 0.3719754762016193, + "grad_norm": 23.409099578857422, + "learning_rate": 5e-05, + "loss": 1.3342, + "num_input_tokens_seen": 263220920, + "step": 3974 + }, + { + "epoch": 0.3719754762016193, + "loss": 1.3142833709716797, + "loss_ce": 0.004713157191872597, + "loss_iou": 0.578125, + "loss_num": 0.0303955078125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 263220920, + "step": 3974 + }, + { + "epoch": 0.37206907848551507, + "grad_norm": 31.830984115600586, + "learning_rate": 5e-05, + "loss": 1.4793, + "num_input_tokens_seen": 263286796, + "step": 3975 + }, + { + "epoch": 0.37206907848551507, + "loss": 1.300828218460083, + "loss_ce": 0.010789182037115097, + "loss_iou": 0.5234375, + "loss_num": 0.048583984375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 263286796, + "step": 3975 + }, + { + "epoch": 0.3721626807694108, + "grad_norm": 43.15821838378906, + "learning_rate": 5e-05, + "loss": 1.1752, + "num_input_tokens_seen": 263353516, + "step": 3976 + }, + { + "epoch": 0.3721626807694108, + "loss": 1.0715612173080444, + "loss_ce": 0.003201832063496113, + "loss_iou": 0.45703125, + "loss_num": 0.0308837890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 263353516, + "step": 3976 + }, + { + "epoch": 0.3722562830533065, + "grad_norm": 29.856653213500977, + "learning_rate": 5e-05, + "loss": 1.1555, + "num_input_tokens_seen": 263419444, + "step": 3977 + }, + { + "epoch": 0.3722562830533065, + "loss": 1.1790707111358643, + "loss_ce": 0.003777771722525358, + "loss_iou": 0.5078125, + "loss_num": 0.031982421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 263419444, + "step": 3977 + }, + { + "epoch": 0.3723498853372022, + "grad_norm": 22.400190353393555, + "learning_rate": 5e-05, + "loss": 1.5132, + "num_input_tokens_seen": 263485440, + "step": 3978 + }, + { + "epoch": 0.3723498853372022, + "loss": 1.6392052173614502, + "loss_ce": 0.006392636336386204, + "loss_iou": 0.66796875, + "loss_num": 0.059814453125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 263485440, + "step": 3978 + }, + { + "epoch": 0.37244348762109797, + "grad_norm": 22.372915267944336, + "learning_rate": 5e-05, + "loss": 1.3614, + "num_input_tokens_seen": 263551080, + "step": 3979 + }, + { + "epoch": 0.37244348762109797, + "loss": 1.2551847696304321, + "loss_ce": 0.007137857377529144, + "loss_iou": 0.484375, + "loss_num": 0.055908203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 263551080, + "step": 3979 + }, + { + "epoch": 0.3725370899049937, + "grad_norm": 27.176448822021484, + "learning_rate": 5e-05, + "loss": 1.3504, + "num_input_tokens_seen": 263617276, + "step": 3980 + }, + { + "epoch": 0.3725370899049937, + "loss": 1.3879384994506836, + "loss_ce": 0.006102666258811951, + "loss_iou": 0.57421875, + "loss_num": 0.04638671875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 263617276, + "step": 3980 + }, + { + "epoch": 0.3726306921888894, + "grad_norm": 23.19794464111328, + "learning_rate": 5e-05, + "loss": 1.1345, + "num_input_tokens_seen": 263683032, + "step": 3981 + }, + { + "epoch": 0.3726306921888894, + "loss": 1.2938721179962158, + "loss_ce": 0.002856516744941473, + "loss_iou": 0.5078125, + "loss_num": 0.055419921875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 263683032, + "step": 3981 + }, + { + "epoch": 0.37272429447278516, + "grad_norm": 33.36949157714844, + "learning_rate": 5e-05, + "loss": 1.1853, + "num_input_tokens_seen": 263748408, + "step": 3982 + }, + { + "epoch": 0.37272429447278516, + "loss": 1.0041017532348633, + "loss_ce": 0.0072755636647343636, + "loss_iou": 0.3984375, + "loss_num": 0.03955078125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 263748408, + "step": 3982 + }, + { + "epoch": 0.37281789675668087, + "grad_norm": 23.538049697875977, + "learning_rate": 5e-05, + "loss": 1.3909, + "num_input_tokens_seen": 263815224, + "step": 3983 + }, + { + "epoch": 0.37281789675668087, + "loss": 1.483205795288086, + "loss_ce": 0.004690231755375862, + "loss_iou": 0.61328125, + "loss_num": 0.05078125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 263815224, + "step": 3983 + }, + { + "epoch": 0.3729114990405766, + "grad_norm": 21.951091766357422, + "learning_rate": 5e-05, + "loss": 1.3645, + "num_input_tokens_seen": 263881316, + "step": 3984 + }, + { + "epoch": 0.3729114990405766, + "loss": 1.4610261917114258, + "loss_ce": 0.005948091857135296, + "loss_iou": 0.55859375, + "loss_num": 0.06689453125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 263881316, + "step": 3984 + }, + { + "epoch": 0.37300510132447234, + "grad_norm": 38.7070198059082, + "learning_rate": 5e-05, + "loss": 1.206, + "num_input_tokens_seen": 263946392, + "step": 3985 + }, + { + "epoch": 0.37300510132447234, + "loss": 1.2883522510528564, + "loss_ce": 0.0018533116672188044, + "loss_iou": 0.52734375, + "loss_num": 0.04638671875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 263946392, + "step": 3985 + }, + { + "epoch": 0.37309870360836805, + "grad_norm": 18.664636611938477, + "learning_rate": 5e-05, + "loss": 1.2316, + "num_input_tokens_seen": 264011620, + "step": 3986 + }, + { + "epoch": 0.37309870360836805, + "loss": 1.0689376592636108, + "loss_ce": 0.008879079483449459, + "loss_iou": 0.40234375, + "loss_num": 0.05126953125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 264011620, + "step": 3986 + }, + { + "epoch": 0.37319230589226376, + "grad_norm": 19.976411819458008, + "learning_rate": 5e-05, + "loss": 1.0078, + "num_input_tokens_seen": 264077216, + "step": 3987 + }, + { + "epoch": 0.37319230589226376, + "loss": 0.9493486881256104, + "loss_ce": 0.004524484742432833, + "loss_iou": 0.40234375, + "loss_num": 0.0283203125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 264077216, + "step": 3987 + }, + { + "epoch": 0.3732859081761595, + "grad_norm": 41.811859130859375, + "learning_rate": 5e-05, + "loss": 1.2472, + "num_input_tokens_seen": 264143472, + "step": 3988 + }, + { + "epoch": 0.3732859081761595, + "loss": 1.242361307144165, + "loss_ce": 0.0045683011412620544, + "loss_iou": 0.49609375, + "loss_num": 0.048828125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 264143472, + "step": 3988 + }, + { + "epoch": 0.37337951046005524, + "grad_norm": 15.546930313110352, + "learning_rate": 5e-05, + "loss": 1.1988, + "num_input_tokens_seen": 264209472, + "step": 3989 + }, + { + "epoch": 0.37337951046005524, + "loss": 1.1625902652740479, + "loss_ce": 0.004204002674669027, + "loss_iou": 0.478515625, + "loss_num": 0.04052734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 264209472, + "step": 3989 + }, + { + "epoch": 0.37347311274395095, + "grad_norm": 20.363605499267578, + "learning_rate": 5e-05, + "loss": 1.2522, + "num_input_tokens_seen": 264275612, + "step": 3990 + }, + { + "epoch": 0.37347311274395095, + "loss": 1.4210174083709717, + "loss_ce": 0.010372873395681381, + "loss_iou": 0.5546875, + "loss_num": 0.060302734375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 264275612, + "step": 3990 + }, + { + "epoch": 0.37356671502784666, + "grad_norm": 22.40890884399414, + "learning_rate": 5e-05, + "loss": 1.3464, + "num_input_tokens_seen": 264342076, + "step": 3991 + }, + { + "epoch": 0.37356671502784666, + "loss": 1.2793970108032227, + "loss_ce": 0.009865716099739075, + "loss_iou": 0.50390625, + "loss_num": 0.052734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 264342076, + "step": 3991 + }, + { + "epoch": 0.37366031731174243, + "grad_norm": 40.94968032836914, + "learning_rate": 5e-05, + "loss": 1.2437, + "num_input_tokens_seen": 264408764, + "step": 3992 + }, + { + "epoch": 0.37366031731174243, + "loss": 1.3521018028259277, + "loss_ce": 0.004445582628250122, + "loss_iou": 0.56640625, + "loss_num": 0.04345703125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 264408764, + "step": 3992 + }, + { + "epoch": 0.37375391959563814, + "grad_norm": 28.47822380065918, + "learning_rate": 5e-05, + "loss": 1.7147, + "num_input_tokens_seen": 264474700, + "step": 3993 + }, + { + "epoch": 0.37375391959563814, + "loss": 1.8727054595947266, + "loss_ce": 0.004541396629065275, + "loss_iou": 0.74609375, + "loss_num": 0.0751953125, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 264474700, + "step": 3993 + }, + { + "epoch": 0.37384752187953385, + "grad_norm": 15.673739433288574, + "learning_rate": 5e-05, + "loss": 1.272, + "num_input_tokens_seen": 264539576, + "step": 3994 + }, + { + "epoch": 0.37384752187953385, + "loss": 1.1188582181930542, + "loss_ce": 0.006797630339860916, + "loss_iou": 0.447265625, + "loss_num": 0.043701171875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 264539576, + "step": 3994 + }, + { + "epoch": 0.37394112416342956, + "grad_norm": 30.9637451171875, + "learning_rate": 5e-05, + "loss": 1.3586, + "num_input_tokens_seen": 264605320, + "step": 3995 + }, + { + "epoch": 0.37394112416342956, + "loss": 1.4542756080627441, + "loss_ce": 0.004080276004970074, + "loss_iou": 0.52734375, + "loss_num": 0.07958984375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 264605320, + "step": 3995 + }, + { + "epoch": 0.3740347264473253, + "grad_norm": 22.062971115112305, + "learning_rate": 5e-05, + "loss": 1.4988, + "num_input_tokens_seen": 264672384, + "step": 3996 + }, + { + "epoch": 0.3740347264473253, + "loss": 1.524160623550415, + "loss_ce": 0.0051176357083022594, + "loss_iou": 0.6171875, + "loss_num": 0.057373046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 264672384, + "step": 3996 + }, + { + "epoch": 0.37412832873122104, + "grad_norm": 82.78164672851562, + "learning_rate": 5e-05, + "loss": 1.3324, + "num_input_tokens_seen": 264737884, + "step": 3997 + }, + { + "epoch": 0.37412832873122104, + "loss": 1.3954143524169922, + "loss_ce": 0.00527762807905674, + "loss_iou": 0.515625, + "loss_num": 0.0712890625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 264737884, + "step": 3997 + }, + { + "epoch": 0.37422193101511675, + "grad_norm": 25.055173873901367, + "learning_rate": 5e-05, + "loss": 1.132, + "num_input_tokens_seen": 264804800, + "step": 3998 + }, + { + "epoch": 0.37422193101511675, + "loss": 1.2982065677642822, + "loss_ce": 0.004749562591314316, + "loss_iou": 0.5546875, + "loss_num": 0.0361328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 264804800, + "step": 3998 + }, + { + "epoch": 0.3743155332990125, + "grad_norm": 36.58376693725586, + "learning_rate": 5e-05, + "loss": 1.3056, + "num_input_tokens_seen": 264869108, + "step": 3999 + }, + { + "epoch": 0.3743155332990125, + "loss": 1.1527501344680786, + "loss_ce": 0.005533342249691486, + "loss_iou": 0.52734375, + "loss_num": 0.018798828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 264869108, + "step": 3999 + }, + { + "epoch": 0.3744091355829082, + "grad_norm": 54.213768005371094, + "learning_rate": 5e-05, + "loss": 1.8184, + "num_input_tokens_seen": 264936296, + "step": 4000 + }, + { + "epoch": 0.3744091355829082, + "eval_seeclick_CIoU": 0.15379706770181656, + "eval_seeclick_GIoU": 0.1632080227136612, + "eval_seeclick_IoU": 0.2842213287949562, + "eval_seeclick_MAE_all": 0.17822610586881638, + "eval_seeclick_MAE_h": 0.11929627135396004, + "eval_seeclick_MAE_w": 0.11855470016598701, + "eval_seeclick_MAE_x_boxes": 0.25859377533197403, + "eval_seeclick_MAE_y_boxes": 0.1552606150507927, + "eval_seeclick_NUM_probability": 0.9998007714748383, + "eval_seeclick_inside_bbox": 0.3895833343267441, + "eval_seeclick_loss": 2.629326581954956, + "eval_seeclick_loss_ce": 0.014735812786966562, + "eval_seeclick_loss_iou": 0.890380859375, + "eval_seeclick_loss_num": 0.170318603515625, + "eval_seeclick_loss_xval": 2.63330078125, + "eval_seeclick_runtime": 72.9953, + "eval_seeclick_samples_per_second": 0.644, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 264936296, + "step": 4000 + }, + { + "epoch": 0.3744091355829082, + "eval_icons_CIoU": -0.09496079757809639, + "eval_icons_GIoU": 0.035429807379841805, + "eval_icons_IoU": 0.1063428670167923, + "eval_icons_MAE_all": 0.20099008828401566, + "eval_icons_MAE_h": 0.18222493678331375, + "eval_icons_MAE_w": 0.1513308808207512, + "eval_icons_MAE_x_boxes": 0.16003187745809555, + "eval_icons_MAE_y_boxes": 0.127197178080678, + "eval_icons_NUM_probability": 0.9998990595340729, + "eval_icons_inside_bbox": 0.1649305559694767, + "eval_icons_loss": 2.8375203609466553, + "eval_icons_loss_ce": 2.2454639292845968e-05, + "eval_icons_loss_iou": 0.950927734375, + "eval_icons_loss_num": 0.198516845703125, + "eval_icons_loss_xval": 2.89208984375, + "eval_icons_runtime": 73.6202, + "eval_icons_samples_per_second": 0.679, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 264936296, + "step": 4000 + }, + { + "epoch": 0.3744091355829082, + "eval_screenspot_CIoU": -0.019918086628119152, + "eval_screenspot_GIoU": 0.013222339873512587, + "eval_screenspot_IoU": 0.15667323768138885, + "eval_screenspot_MAE_all": 0.21852064629395804, + "eval_screenspot_MAE_h": 0.1598268449306488, + "eval_screenspot_MAE_w": 0.18928087254365286, + "eval_screenspot_MAE_x_boxes": 0.29260844985644024, + "eval_screenspot_MAE_y_boxes": 0.14141333103179932, + "eval_screenspot_NUM_probability": 0.9999316533406576, + "eval_screenspot_inside_bbox": 0.3254166642824809, + "eval_screenspot_loss": 3.0932860374450684, + "eval_screenspot_loss_ce": 0.010087936495741209, + "eval_screenspot_loss_iou": 0.9951171875, + "eval_screenspot_loss_num": 0.22828165690104166, + "eval_screenspot_loss_xval": 3.1318359375, + "eval_screenspot_runtime": 126.8978, + "eval_screenspot_samples_per_second": 0.701, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 264936296, + "step": 4000 + }, + { + "epoch": 0.3744091355829082, + "eval_compot_CIoU": -0.0741785280406475, + "eval_compot_GIoU": 0.020535959862172604, + "eval_compot_IoU": 0.1116841621696949, + "eval_compot_MAE_all": 0.2555025890469551, + "eval_compot_MAE_h": 0.18729552626609802, + "eval_compot_MAE_w": 0.2644929438829422, + "eval_compot_MAE_x_boxes": 0.21654167771339417, + "eval_compot_MAE_y_boxes": 0.12299535050988197, + "eval_compot_NUM_probability": 0.9999019503593445, + "eval_compot_inside_bbox": 0.1892361119389534, + "eval_compot_loss": 3.307427406311035, + "eval_compot_loss_ce": 0.00394441606476903, + "eval_compot_loss_iou": 1.0068359375, + "eval_compot_loss_num": 0.2715911865234375, + "eval_compot_loss_xval": 3.3720703125, + "eval_compot_runtime": 70.3304, + "eval_compot_samples_per_second": 0.711, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 264936296, + "step": 4000 + }, + { + "epoch": 0.3744091355829082, + "eval_custom_ui_MAE_all": 0.147480309009552, + "eval_custom_ui_MAE_x": 0.15427882224321365, + "eval_custom_ui_MAE_y": 0.14068179205060005, + "eval_custom_ui_NUM_probability": 0.9999688565731049, + "eval_custom_ui_loss": 0.8656248450279236, + "eval_custom_ui_loss_ce": 0.1458643600344658, + "eval_custom_ui_loss_num": 0.1512298583984375, + "eval_custom_ui_loss_xval": 0.756591796875, + "eval_custom_ui_runtime": 55.7567, + "eval_custom_ui_samples_per_second": 0.897, + "eval_custom_ui_steps_per_second": 0.036, + "num_input_tokens_seen": 264936296, + "step": 4000 + } + ], + "logging_steps": 1.0, + "max_steps": 10683, + "num_input_tokens_seen": 264936296, + "num_train_epochs": 1, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2325338094847918e+19, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}