diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,36917 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.655574043261232, + "eval_steps": 250, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0033277870216306157, + "grad_norm": 99.72433471679688, + "learning_rate": 5e-06, + "loss": 2.2579, + "num_input_tokens_seen": 62524, + "step": 1 + }, + { + "epoch": 0.0033277870216306157, + "loss": 2.4882845878601074, + "loss_ce": 0.4775424301624298, + "loss_iou": 0.55859375, + "loss_num": 0.177734375, + "loss_xval": 2.015625, + "num_input_tokens_seen": 62524, + "step": 1 + }, + { + "epoch": 0.0066555740432612314, + "grad_norm": 43.31109619140625, + "learning_rate": 5e-06, + "loss": 1.8354, + "num_input_tokens_seen": 124728, + "step": 2 + }, + { + "epoch": 0.0066555740432612314, + "loss": 1.5098202228546143, + "loss_ce": 0.0459531769156456, + "loss_iou": 0.51953125, + "loss_num": 0.0849609375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 124728, + "step": 2 + }, + { + "epoch": 0.009983361064891847, + "grad_norm": 40.23197555541992, + "learning_rate": 5e-06, + "loss": 1.8923, + "num_input_tokens_seen": 187516, + "step": 3 + }, + { + "epoch": 0.009983361064891847, + "loss": 1.9805556535720825, + "loss_ce": 0.46248918771743774, + "loss_iou": 0.4375, + "loss_num": 0.12890625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 187516, + "step": 3 + }, + { + "epoch": 0.013311148086522463, + "grad_norm": 41.29404067993164, + "learning_rate": 5e-06, + "loss": 1.8433, + "num_input_tokens_seen": 249256, + "step": 4 + }, + { + "epoch": 0.013311148086522463, + "loss": 1.760237216949463, + "loss_ce": 0.29978805780410767, + "loss_iou": 0.474609375, + "loss_num": 0.10205078125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 249256, + "step": 4 + }, + { + "epoch": 0.016638935108153077, + "grad_norm": 45.62328338623047, + "learning_rate": 5e-06, + "loss": 1.9853, + "num_input_tokens_seen": 312896, + "step": 5 + }, + { + "epoch": 0.016638935108153077, + "loss": 1.6446335315704346, + "loss_ce": 0.36777815222740173, + "loss_iou": 0.38671875, + "loss_num": 0.1005859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 312896, + "step": 5 + }, + { + "epoch": 0.019966722129783693, + "grad_norm": 26.73392677307129, + "learning_rate": 5e-06, + "loss": 1.5267, + "num_input_tokens_seen": 373800, + "step": 6 + }, + { + "epoch": 0.019966722129783693, + "loss": 1.481225848197937, + "loss_ce": 0.186303973197937, + "loss_iou": 0.453125, + "loss_num": 0.0771484375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 373800, + "step": 6 + }, + { + "epoch": 0.02329450915141431, + "grad_norm": 28.28041648864746, + "learning_rate": 5e-06, + "loss": 1.8401, + "num_input_tokens_seen": 435532, + "step": 7 + }, + { + "epoch": 0.02329450915141431, + "loss": 1.7152678966522217, + "loss_ce": 0.2172209620475769, + "loss_iou": 0.3515625, + "loss_num": 0.158203125, + "loss_xval": 1.5, + "num_input_tokens_seen": 435532, + "step": 7 + }, + { + "epoch": 0.026622296173044926, + "grad_norm": 93.13752746582031, + "learning_rate": 5e-06, + "loss": 1.7189, + "num_input_tokens_seen": 498240, + "step": 8 + }, + { + "epoch": 0.026622296173044926, + "loss": 1.7084490060806274, + "loss_ce": 0.12007011473178864, + "loss_iou": 0.49609375, + "loss_num": 0.11865234375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 498240, + "step": 8 + }, + { + "epoch": 0.029950083194675542, + "grad_norm": 55.754974365234375, + "learning_rate": 5e-06, + "loss": 1.9588, + "num_input_tokens_seen": 560944, + "step": 9 + }, + { + "epoch": 0.029950083194675542, + "loss": 1.9288438558578491, + "loss_ce": 0.43592390418052673, + "loss_iou": 0.376953125, + "loss_num": 0.1474609375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 560944, + "step": 9 + }, + { + "epoch": 0.033277870216306155, + "grad_norm": 24.77700424194336, + "learning_rate": 5e-06, + "loss": 1.7834, + "num_input_tokens_seen": 623276, + "step": 10 + }, + { + "epoch": 0.033277870216306155, + "loss": 1.2600505352020264, + "loss_ce": 0.17655442655086517, + "loss_iou": 0.169921875, + "loss_num": 0.1494140625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 623276, + "step": 10 + }, + { + "epoch": 0.036605657237936774, + "grad_norm": 53.40562438964844, + "learning_rate": 5e-06, + "loss": 1.9654, + "num_input_tokens_seen": 686344, + "step": 11 + }, + { + "epoch": 0.036605657237936774, + "loss": 1.5656721591949463, + "loss_ce": 0.15307454764842987, + "loss_iou": 0.32421875, + "loss_num": 0.15234375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 686344, + "step": 11 + }, + { + "epoch": 0.03993344425956739, + "grad_norm": 185.1483612060547, + "learning_rate": 5e-06, + "loss": 1.5234, + "num_input_tokens_seen": 748796, + "step": 12 + }, + { + "epoch": 0.03993344425956739, + "loss": 0.9671105146408081, + "loss_ce": 0.1262902468442917, + "loss_iou": 0.0, + "loss_num": 0.16796875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 748796, + "step": 12 + }, + { + "epoch": 0.04326123128119801, + "grad_norm": 41.721710205078125, + "learning_rate": 5e-06, + "loss": 2.2144, + "num_input_tokens_seen": 811248, + "step": 13 + }, + { + "epoch": 0.04326123128119801, + "loss": 1.7121745347976685, + "loss_ce": 0.382096529006958, + "loss_iou": 0.34765625, + "loss_num": 0.126953125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 811248, + "step": 13 + }, + { + "epoch": 0.04658901830282862, + "grad_norm": 26.450149536132812, + "learning_rate": 5e-06, + "loss": 2.0257, + "num_input_tokens_seen": 874336, + "step": 14 + }, + { + "epoch": 0.04658901830282862, + "loss": 2.167698383331299, + "loss_ce": 0.07883133739233017, + "loss_iou": 0.65234375, + "loss_num": 0.15625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 874336, + "step": 14 + }, + { + "epoch": 0.04991680532445923, + "grad_norm": 28.79014015197754, + "learning_rate": 5e-06, + "loss": 1.5748, + "num_input_tokens_seen": 934104, + "step": 15 + }, + { + "epoch": 0.04991680532445923, + "loss": 1.746118426322937, + "loss_ce": 0.3081301152706146, + "loss_iou": 0.34375, + "loss_num": 0.150390625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 934104, + "step": 15 + }, + { + "epoch": 0.05324459234608985, + "grad_norm": 35.701480865478516, + "learning_rate": 5e-06, + "loss": 1.6555, + "num_input_tokens_seen": 995740, + "step": 16 + }, + { + "epoch": 0.05324459234608985, + "loss": 1.4585609436035156, + "loss_ce": 0.164127379655838, + "loss_iou": 0.318359375, + "loss_num": 0.1318359375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 995740, + "step": 16 + }, + { + "epoch": 0.056572379367720464, + "grad_norm": 31.904708862304688, + "learning_rate": 5e-06, + "loss": 1.4605, + "num_input_tokens_seen": 1057552, + "step": 17 + }, + { + "epoch": 0.056572379367720464, + "loss": 1.500415563583374, + "loss_ce": 0.4437748193740845, + "loss_iou": 0.26171875, + "loss_num": 0.10693359375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 1057552, + "step": 17 + }, + { + "epoch": 0.059900166389351084, + "grad_norm": 26.225719451904297, + "learning_rate": 5e-06, + "loss": 1.8686, + "num_input_tokens_seen": 1117304, + "step": 18 + }, + { + "epoch": 0.059900166389351084, + "loss": 2.0437541007995605, + "loss_ce": 0.25664472579956055, + "loss_iou": 0.546875, + "loss_num": 0.1376953125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 1117304, + "step": 18 + }, + { + "epoch": 0.0632279534109817, + "grad_norm": 107.4391098022461, + "learning_rate": 5e-06, + "loss": 1.8643, + "num_input_tokens_seen": 1180152, + "step": 19 + }, + { + "epoch": 0.0632279534109817, + "loss": 1.5854601860046387, + "loss_ce": 0.12549912929534912, + "loss_iou": 0.359375, + "loss_num": 0.1484375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 1180152, + "step": 19 + }, + { + "epoch": 0.06655574043261231, + "grad_norm": 31.948070526123047, + "learning_rate": 5e-06, + "loss": 1.6704, + "num_input_tokens_seen": 1240584, + "step": 20 + }, + { + "epoch": 0.06655574043261231, + "loss": 1.782106876373291, + "loss_ce": 0.6380637884140015, + "loss_iou": 0.203125, + "loss_num": 0.1474609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 1240584, + "step": 20 + }, + { + "epoch": 0.06988352745424292, + "grad_norm": 39.34145736694336, + "learning_rate": 5e-06, + "loss": 1.8904, + "num_input_tokens_seen": 1301508, + "step": 21 + }, + { + "epoch": 0.06988352745424292, + "loss": 1.6883811950683594, + "loss_ce": 0.6380882263183594, + "loss_iou": 0.2470703125, + "loss_num": 0.111328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 1301508, + "step": 21 + }, + { + "epoch": 0.07321131447587355, + "grad_norm": 28.05674934387207, + "learning_rate": 5e-06, + "loss": 2.0444, + "num_input_tokens_seen": 1363652, + "step": 22 + }, + { + "epoch": 0.07321131447587355, + "loss": 2.08927845954895, + "loss_ce": 0.1547081470489502, + "loss_iou": 0.5625, + "loss_num": 0.162109375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 1363652, + "step": 22 + }, + { + "epoch": 0.07653910149750416, + "grad_norm": 34.66334915161133, + "learning_rate": 5e-06, + "loss": 2.0099, + "num_input_tokens_seen": 1427244, + "step": 23 + }, + { + "epoch": 0.07653910149750416, + "loss": 2.1946048736572266, + "loss_ce": 0.20241737365722656, + "loss_iou": 0.640625, + "loss_num": 0.142578125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 1427244, + "step": 23 + }, + { + "epoch": 0.07986688851913477, + "grad_norm": 16.318044662475586, + "learning_rate": 5e-06, + "loss": 1.6373, + "num_input_tokens_seen": 1488232, + "step": 24 + }, + { + "epoch": 0.07986688851913477, + "loss": 1.2525696754455566, + "loss_ce": 0.21619272232055664, + "loss_iou": 0.251953125, + "loss_num": 0.1064453125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 1488232, + "step": 24 + }, + { + "epoch": 0.08319467554076539, + "grad_norm": 22.026212692260742, + "learning_rate": 5e-06, + "loss": 1.8012, + "num_input_tokens_seen": 1549668, + "step": 25 + }, + { + "epoch": 0.08319467554076539, + "loss": 1.860095500946045, + "loss_ce": 0.44945091009140015, + "loss_iou": 0.30859375, + "loss_num": 0.1591796875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 1549668, + "step": 25 + }, + { + "epoch": 0.08652246256239601, + "grad_norm": 26.607149124145508, + "learning_rate": 5e-06, + "loss": 1.898, + "num_input_tokens_seen": 1612748, + "step": 26 + }, + { + "epoch": 0.08652246256239601, + "loss": 1.7494425773620605, + "loss_ce": 0.19084876775741577, + "loss_iou": 0.4375, + "loss_num": 0.13671875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 1612748, + "step": 26 + }, + { + "epoch": 0.08985024958402663, + "grad_norm": 100.14049530029297, + "learning_rate": 5e-06, + "loss": 1.9263, + "num_input_tokens_seen": 1675456, + "step": 27 + }, + { + "epoch": 0.08985024958402663, + "loss": 1.7160944938659668, + "loss_ce": 0.20193445682525635, + "loss_iou": 0.5078125, + "loss_num": 0.099609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 1675456, + "step": 27 + }, + { + "epoch": 0.09317803660565724, + "grad_norm": 31.630752563476562, + "learning_rate": 5e-06, + "loss": 2.0261, + "num_input_tokens_seen": 1736840, + "step": 28 + }, + { + "epoch": 0.09317803660565724, + "loss": 1.7389755249023438, + "loss_ce": 0.17842870950698853, + "loss_iou": 0.42578125, + "loss_num": 0.1416015625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 1736840, + "step": 28 + }, + { + "epoch": 0.09650582362728785, + "grad_norm": 23.318147659301758, + "learning_rate": 5e-06, + "loss": 1.7689, + "num_input_tokens_seen": 1799508, + "step": 29 + }, + { + "epoch": 0.09650582362728785, + "loss": 1.8038142919540405, + "loss_ce": 0.46494707465171814, + "loss_iou": 0.357421875, + "loss_num": 0.12451171875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 1799508, + "step": 29 + }, + { + "epoch": 0.09983361064891846, + "grad_norm": 20.128877639770508, + "learning_rate": 5e-06, + "loss": 2.0295, + "num_input_tokens_seen": 1862340, + "step": 30 + }, + { + "epoch": 0.09983361064891846, + "loss": 2.387416362762451, + "loss_ce": 0.5177874565124512, + "loss_iou": 0.53125, + "loss_num": 0.162109375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 1862340, + "step": 30 + }, + { + "epoch": 0.10316139767054909, + "grad_norm": 102.55489349365234, + "learning_rate": 5e-06, + "loss": 1.695, + "num_input_tokens_seen": 1926308, + "step": 31 + }, + { + "epoch": 0.10316139767054909, + "loss": 1.4592796564102173, + "loss_ce": 0.12383048981428146, + "loss_iou": 0.447265625, + "loss_num": 0.0888671875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 1926308, + "step": 31 + }, + { + "epoch": 0.1064891846921797, + "grad_norm": 24.122255325317383, + "learning_rate": 5e-06, + "loss": 1.5715, + "num_input_tokens_seen": 1987296, + "step": 32 + }, + { + "epoch": 0.1064891846921797, + "loss": 1.2685531377792358, + "loss_ce": 0.11279135942459106, + "loss_iou": 0.1845703125, + "loss_num": 0.1572265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 1987296, + "step": 32 + }, + { + "epoch": 0.10981697171381032, + "grad_norm": 29.521120071411133, + "learning_rate": 5e-06, + "loss": 1.9234, + "num_input_tokens_seen": 2049684, + "step": 33 + }, + { + "epoch": 0.10981697171381032, + "loss": 2.4738051891326904, + "loss_ce": 0.3732193112373352, + "loss_iou": 0.62109375, + "loss_num": 0.171875, + "loss_xval": 2.09375, + "num_input_tokens_seen": 2049684, + "step": 33 + }, + { + "epoch": 0.11314475873544093, + "grad_norm": 33.83547592163086, + "learning_rate": 5e-06, + "loss": 2.2031, + "num_input_tokens_seen": 2112524, + "step": 34 + }, + { + "epoch": 0.11314475873544093, + "loss": 2.4939706325531006, + "loss_ce": 0.40705662965774536, + "loss_iou": 0.63671875, + "loss_num": 0.162109375, + "loss_xval": 2.09375, + "num_input_tokens_seen": 2112524, + "step": 34 + }, + { + "epoch": 0.11647254575707154, + "grad_norm": 28.041799545288086, + "learning_rate": 5e-06, + "loss": 1.9234, + "num_input_tokens_seen": 2174764, + "step": 35 + }, + { + "epoch": 0.11647254575707154, + "loss": 2.1027140617370605, + "loss_ce": 0.321463942527771, + "loss_iou": 0.404296875, + "loss_num": 0.1953125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 2174764, + "step": 35 + }, + { + "epoch": 0.11980033277870217, + "grad_norm": 16.05351448059082, + "learning_rate": 5e-06, + "loss": 1.6234, + "num_input_tokens_seen": 2236936, + "step": 36 + }, + { + "epoch": 0.11980033277870217, + "loss": 1.9183712005615234, + "loss_ce": 0.2826289236545563, + "loss_iou": 0.53515625, + "loss_num": 0.11376953125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 2236936, + "step": 36 + }, + { + "epoch": 0.12312811980033278, + "grad_norm": 25.214027404785156, + "learning_rate": 5e-06, + "loss": 1.7429, + "num_input_tokens_seen": 2297968, + "step": 37 + }, + { + "epoch": 0.12312811980033278, + "loss": 1.8933910131454468, + "loss_ce": 0.352375328540802, + "loss_iou": 0.453125, + "loss_num": 0.126953125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 2297968, + "step": 37 + }, + { + "epoch": 0.1264559068219634, + "grad_norm": 41.4306526184082, + "learning_rate": 5e-06, + "loss": 1.9087, + "num_input_tokens_seen": 2359480, + "step": 38 + }, + { + "epoch": 0.1264559068219634, + "loss": 1.9372410774230957, + "loss_ce": 0.3669285476207733, + "loss_iou": 0.44921875, + "loss_num": 0.134765625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 2359480, + "step": 38 + }, + { + "epoch": 0.129783693843594, + "grad_norm": 87.06895446777344, + "learning_rate": 5e-06, + "loss": 1.4617, + "num_input_tokens_seen": 2422388, + "step": 39 + }, + { + "epoch": 0.129783693843594, + "loss": 1.5843279361724854, + "loss_ce": 0.26157402992248535, + "loss_iou": 0.408203125, + "loss_num": 0.10107421875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 2422388, + "step": 39 + }, + { + "epoch": 0.13311148086522462, + "grad_norm": 22.859128952026367, + "learning_rate": 5e-06, + "loss": 1.7171, + "num_input_tokens_seen": 2485508, + "step": 40 + }, + { + "epoch": 0.13311148086522462, + "loss": 1.6260865926742554, + "loss_ce": 0.1578247845172882, + "loss_iou": 0.376953125, + "loss_num": 0.142578125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 2485508, + "step": 40 + }, + { + "epoch": 0.13643926788685523, + "grad_norm": 29.294437408447266, + "learning_rate": 5e-06, + "loss": 1.6604, + "num_input_tokens_seen": 2547740, + "step": 41 + }, + { + "epoch": 0.13643926788685523, + "loss": 1.5736039876937866, + "loss_ce": 0.11828167736530304, + "loss_iou": 0.384765625, + "loss_num": 0.1376953125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 2547740, + "step": 41 + }, + { + "epoch": 0.13976705490848584, + "grad_norm": 22.002099990844727, + "learning_rate": 5e-06, + "loss": 1.9157, + "num_input_tokens_seen": 2610656, + "step": 42 + }, + { + "epoch": 0.13976705490848584, + "loss": 2.026005744934082, + "loss_ce": 0.26331043243408203, + "loss_iou": 0.515625, + "loss_num": 0.146484375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 2610656, + "step": 42 + }, + { + "epoch": 0.14309484193011648, + "grad_norm": 42.256507873535156, + "learning_rate": 5e-06, + "loss": 1.9787, + "num_input_tokens_seen": 2671692, + "step": 43 + }, + { + "epoch": 0.14309484193011648, + "loss": 1.8853733539581299, + "loss_ce": 0.2652561664581299, + "loss_iou": 0.50390625, + "loss_num": 0.12158203125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 2671692, + "step": 43 + }, + { + "epoch": 0.1464226289517471, + "grad_norm": 23.313539505004883, + "learning_rate": 5e-06, + "loss": 1.97, + "num_input_tokens_seen": 2734876, + "step": 44 + }, + { + "epoch": 0.1464226289517471, + "loss": 2.3043510913848877, + "loss_ce": 0.4186089336872101, + "loss_iou": 0.58203125, + "loss_num": 0.1435546875, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 2734876, + "step": 44 + }, + { + "epoch": 0.1497504159733777, + "grad_norm": 19.345558166503906, + "learning_rate": 5e-06, + "loss": 1.9084, + "num_input_tokens_seen": 2798924, + "step": 45 + }, + { + "epoch": 0.1497504159733777, + "loss": 1.7593090534210205, + "loss_ce": 0.1675121784210205, + "loss_iou": 0.50390625, + "loss_num": 0.1171875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 2798924, + "step": 45 + }, + { + "epoch": 0.15307820299500832, + "grad_norm": 56.2034912109375, + "learning_rate": 5e-06, + "loss": 1.5545, + "num_input_tokens_seen": 2862396, + "step": 46 + }, + { + "epoch": 0.15307820299500832, + "loss": 1.1661244630813599, + "loss_ce": 0.09337049722671509, + "loss_iou": 0.306640625, + "loss_num": 0.091796875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 2862396, + "step": 46 + }, + { + "epoch": 0.15640599001663893, + "grad_norm": 15.956643104553223, + "learning_rate": 5e-06, + "loss": 1.6658, + "num_input_tokens_seen": 2926024, + "step": 47 + }, + { + "epoch": 0.15640599001663893, + "loss": 1.5099748373031616, + "loss_ce": 0.054896753281354904, + "loss_iou": 0.50390625, + "loss_num": 0.08984375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 2926024, + "step": 47 + }, + { + "epoch": 0.15973377703826955, + "grad_norm": 25.402143478393555, + "learning_rate": 5e-06, + "loss": 1.8769, + "num_input_tokens_seen": 2989532, + "step": 48 + }, + { + "epoch": 0.15973377703826955, + "loss": 1.6056416034698486, + "loss_ce": 0.17790716886520386, + "loss_iou": 0.44140625, + "loss_num": 0.10888671875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 2989532, + "step": 48 + }, + { + "epoch": 0.16306156405990016, + "grad_norm": 26.91611671447754, + "learning_rate": 5e-06, + "loss": 1.8648, + "num_input_tokens_seen": 3052660, + "step": 49 + }, + { + "epoch": 0.16306156405990016, + "loss": 1.767176866531372, + "loss_ce": 0.14803630113601685, + "loss_iou": 0.515625, + "loss_num": 0.1181640625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 3052660, + "step": 49 + }, + { + "epoch": 0.16638935108153077, + "grad_norm": 17.735488891601562, + "learning_rate": 5e-06, + "loss": 1.733, + "num_input_tokens_seen": 3114048, + "step": 50 + }, + { + "epoch": 0.16638935108153077, + "loss": 1.8018484115600586, + "loss_ce": 0.201262429356575, + "loss_iou": 0.369140625, + "loss_num": 0.171875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 3114048, + "step": 50 + }, + { + "epoch": 0.16971713810316139, + "grad_norm": 23.010356903076172, + "learning_rate": 5e-06, + "loss": 1.7252, + "num_input_tokens_seen": 3177028, + "step": 51 + }, + { + "epoch": 0.16971713810316139, + "loss": 1.9260194301605225, + "loss_ce": 0.23461312055587769, + "loss_iou": 0.5390625, + "loss_num": 0.1220703125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 3177028, + "step": 51 + }, + { + "epoch": 0.17304492512479203, + "grad_norm": 50.80724334716797, + "learning_rate": 5e-06, + "loss": 1.5932, + "num_input_tokens_seen": 3240048, + "step": 52 + }, + { + "epoch": 0.17304492512479203, + "loss": 1.2517362833023071, + "loss_ce": 0.07107216864824295, + "loss_iou": 0.3046875, + "loss_num": 0.1142578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 3240048, + "step": 52 + }, + { + "epoch": 0.17637271214642264, + "grad_norm": 19.88364601135254, + "learning_rate": 5e-06, + "loss": 1.7949, + "num_input_tokens_seen": 3302084, + "step": 53 + }, + { + "epoch": 0.17637271214642264, + "loss": 1.9920969009399414, + "loss_ce": 0.1786203682422638, + "loss_iou": 0.5703125, + "loss_num": 0.1337890625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 3302084, + "step": 53 + }, + { + "epoch": 0.17970049916805325, + "grad_norm": 15.960768699645996, + "learning_rate": 5e-06, + "loss": 1.8404, + "num_input_tokens_seen": 3364032, + "step": 54 + }, + { + "epoch": 0.17970049916805325, + "loss": 1.923379898071289, + "loss_ce": 0.3872470259666443, + "loss_iou": 0.455078125, + "loss_num": 0.125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 3364032, + "step": 54 + }, + { + "epoch": 0.18302828618968386, + "grad_norm": 34.17069625854492, + "learning_rate": 5e-06, + "loss": 1.6571, + "num_input_tokens_seen": 3426944, + "step": 55 + }, + { + "epoch": 0.18302828618968386, + "loss": 1.422314167022705, + "loss_ce": 0.04633765295147896, + "loss_iou": 0.37109375, + "loss_num": 0.126953125, + "loss_xval": 1.375, + "num_input_tokens_seen": 3426944, + "step": 55 + }, + { + "epoch": 0.18635607321131448, + "grad_norm": 14.88204288482666, + "learning_rate": 5e-06, + "loss": 1.7171, + "num_input_tokens_seen": 3488980, + "step": 56 + }, + { + "epoch": 0.18635607321131448, + "loss": 1.6802122592926025, + "loss_ce": 0.12698963284492493, + "loss_iou": 0.46484375, + "loss_num": 0.1240234375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 3488980, + "step": 56 + }, + { + "epoch": 0.1896838602329451, + "grad_norm": 25.315814971923828, + "learning_rate": 5e-06, + "loss": 1.5849, + "num_input_tokens_seen": 3552192, + "step": 57 + }, + { + "epoch": 0.1896838602329451, + "loss": 1.5455249547958374, + "loss_ce": 0.09777102619409561, + "loss_iou": 0.4609375, + "loss_num": 0.10498046875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 3552192, + "step": 57 + }, + { + "epoch": 0.1930116472545757, + "grad_norm": 52.65762710571289, + "learning_rate": 5e-06, + "loss": 1.7184, + "num_input_tokens_seen": 3614784, + "step": 58 + }, + { + "epoch": 0.1930116472545757, + "loss": 1.7533233165740967, + "loss_ce": 0.1888701170682907, + "loss_iou": 0.462890625, + "loss_num": 0.1279296875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 3614784, + "step": 58 + }, + { + "epoch": 0.19633943427620631, + "grad_norm": 10.436413764953613, + "learning_rate": 5e-06, + "loss": 1.5083, + "num_input_tokens_seen": 3676144, + "step": 59 + }, + { + "epoch": 0.19633943427620631, + "loss": 1.4122167825698853, + "loss_ce": 0.03331056609749794, + "loss_iou": 0.3359375, + "loss_num": 0.1416015625, + "loss_xval": 1.375, + "num_input_tokens_seen": 3676144, + "step": 59 + }, + { + "epoch": 0.19966722129783693, + "grad_norm": 23.417434692382812, + "learning_rate": 5e-06, + "loss": 1.513, + "num_input_tokens_seen": 3740152, + "step": 60 + }, + { + "epoch": 0.19966722129783693, + "loss": 1.52932870388031, + "loss_ce": 0.15628191828727722, + "loss_iou": 0.4453125, + "loss_num": 0.0966796875, + "loss_xval": 1.375, + "num_input_tokens_seen": 3740152, + "step": 60 + }, + { + "epoch": 0.20299500831946754, + "grad_norm": 57.756534576416016, + "learning_rate": 5e-06, + "loss": 1.9057, + "num_input_tokens_seen": 3803536, + "step": 61 + }, + { + "epoch": 0.20299500831946754, + "loss": 2.161196708679199, + "loss_ce": 0.07525897026062012, + "loss_iou": 0.6328125, + "loss_num": 0.1640625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 3803536, + "step": 61 + }, + { + "epoch": 0.20632279534109818, + "grad_norm": 34.947166442871094, + "learning_rate": 5e-06, + "loss": 1.5023, + "num_input_tokens_seen": 3866352, + "step": 62 + }, + { + "epoch": 0.20632279534109818, + "loss": 1.5223082304000854, + "loss_ce": 0.03939810022711754, + "loss_iou": 0.4453125, + "loss_num": 0.11865234375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 3866352, + "step": 62 + }, + { + "epoch": 0.2096505823627288, + "grad_norm": 27.8328800201416, + "learning_rate": 5e-06, + "loss": 1.6887, + "num_input_tokens_seen": 3929720, + "step": 63 + }, + { + "epoch": 0.2096505823627288, + "loss": 1.6723523139953613, + "loss_ce": 0.13377803564071655, + "loss_iou": 0.43359375, + "loss_num": 0.1337890625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 3929720, + "step": 63 + }, + { + "epoch": 0.2129783693843594, + "grad_norm": 28.632810592651367, + "learning_rate": 5e-06, + "loss": 1.8225, + "num_input_tokens_seen": 3993200, + "step": 64 + }, + { + "epoch": 0.2129783693843594, + "loss": 2.061736583709717, + "loss_ce": 0.2531428337097168, + "loss_iou": 0.5703125, + "loss_num": 0.1328125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 3993200, + "step": 64 + }, + { + "epoch": 0.21630615640599002, + "grad_norm": 26.3468074798584, + "learning_rate": 5e-06, + "loss": 1.5015, + "num_input_tokens_seen": 4055156, + "step": 65 + }, + { + "epoch": 0.21630615640599002, + "loss": 1.671527624130249, + "loss_ce": 0.03480884060263634, + "loss_iou": 0.470703125, + "loss_num": 0.1396484375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 4055156, + "step": 65 + }, + { + "epoch": 0.21963394342762063, + "grad_norm": 28.5969295501709, + "learning_rate": 5e-06, + "loss": 1.621, + "num_input_tokens_seen": 4117564, + "step": 66 + }, + { + "epoch": 0.21963394342762063, + "loss": 1.391996145248413, + "loss_ce": 0.03555082529783249, + "loss_iou": 0.462890625, + "loss_num": 0.08642578125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 4117564, + "step": 66 + }, + { + "epoch": 0.22296173044925124, + "grad_norm": 13.81509780883789, + "learning_rate": 5e-06, + "loss": 1.4978, + "num_input_tokens_seen": 4181136, + "step": 67 + }, + { + "epoch": 0.22296173044925124, + "loss": 1.482597827911377, + "loss_ce": 0.14080099761486053, + "loss_iou": 0.435546875, + "loss_num": 0.09375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 4181136, + "step": 67 + }, + { + "epoch": 0.22628951747088186, + "grad_norm": 22.297697067260742, + "learning_rate": 5e-06, + "loss": 1.5529, + "num_input_tokens_seen": 4244384, + "step": 68 + }, + { + "epoch": 0.22628951747088186, + "loss": 1.800987720489502, + "loss_ce": 0.0724719688296318, + "loss_iou": 0.5, + "loss_num": 0.1455078125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 4244384, + "step": 68 + }, + { + "epoch": 0.22961730449251247, + "grad_norm": 17.73756217956543, + "learning_rate": 5e-06, + "loss": 1.6361, + "num_input_tokens_seen": 4306480, + "step": 69 + }, + { + "epoch": 0.22961730449251247, + "loss": 1.7959282398223877, + "loss_ce": 0.25491267442703247, + "loss_iou": 0.427734375, + "loss_num": 0.1376953125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 4306480, + "step": 69 + }, + { + "epoch": 0.23294509151414308, + "grad_norm": 28.785377502441406, + "learning_rate": 5e-06, + "loss": 1.6575, + "num_input_tokens_seen": 4368812, + "step": 70 + }, + { + "epoch": 0.23294509151414308, + "loss": 1.6283104419708252, + "loss_ce": 0.05555645376443863, + "loss_iou": 0.4375, + "loss_num": 0.1396484375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 4368812, + "step": 70 + }, + { + "epoch": 0.23627287853577372, + "grad_norm": 15.95328140258789, + "learning_rate": 5e-06, + "loss": 1.923, + "num_input_tokens_seen": 4432212, + "step": 71 + }, + { + "epoch": 0.23627287853577372, + "loss": 2.040701389312744, + "loss_ce": 0.4274199604988098, + "loss_iou": 0.53125, + "loss_num": 0.10888671875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 4432212, + "step": 71 + }, + { + "epoch": 0.23960066555740434, + "grad_norm": 15.339340209960938, + "learning_rate": 5e-06, + "loss": 1.6562, + "num_input_tokens_seen": 4495800, + "step": 72 + }, + { + "epoch": 0.23960066555740434, + "loss": 1.5833992958068848, + "loss_ce": 0.07705167680978775, + "loss_iou": 0.3984375, + "loss_num": 0.1416015625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 4495800, + "step": 72 + }, + { + "epoch": 0.24292845257903495, + "grad_norm": 64.54541778564453, + "learning_rate": 5e-06, + "loss": 1.9285, + "num_input_tokens_seen": 4558428, + "step": 73 + }, + { + "epoch": 0.24292845257903495, + "loss": 1.8927369117736816, + "loss_ce": 0.13980722427368164, + "loss_iou": 0.5546875, + "loss_num": 0.12890625, + "loss_xval": 1.75, + "num_input_tokens_seen": 4558428, + "step": 73 + }, + { + "epoch": 0.24625623960066556, + "grad_norm": 32.903106689453125, + "learning_rate": 5e-06, + "loss": 2.1965, + "num_input_tokens_seen": 4622312, + "step": 74 + }, + { + "epoch": 0.24625623960066556, + "loss": 1.6801223754882812, + "loss_ce": 0.08515171706676483, + "loss_iou": 0.49609375, + "loss_num": 0.12109375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 4622312, + "step": 74 + }, + { + "epoch": 0.24958402662229617, + "grad_norm": 18.268884658813477, + "learning_rate": 5e-06, + "loss": 1.696, + "num_input_tokens_seen": 4685228, + "step": 75 + }, + { + "epoch": 0.24958402662229617, + "loss": 2.0499777793884277, + "loss_ce": 0.2775166630744934, + "loss_iou": 0.494140625, + "loss_num": 0.1572265625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 4685228, + "step": 75 + }, + { + "epoch": 0.2529118136439268, + "grad_norm": 15.21312141418457, + "learning_rate": 5e-06, + "loss": 1.3746, + "num_input_tokens_seen": 4746040, + "step": 76 + }, + { + "epoch": 0.2529118136439268, + "loss": 1.2260797023773193, + "loss_ce": 0.06128474697470665, + "loss_iou": 0.248046875, + "loss_num": 0.1337890625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 4746040, + "step": 76 + }, + { + "epoch": 0.2562396006655574, + "grad_norm": 24.16434097290039, + "learning_rate": 5e-06, + "loss": 1.6829, + "num_input_tokens_seen": 4809160, + "step": 77 + }, + { + "epoch": 0.2562396006655574, + "loss": 1.6074845790863037, + "loss_ce": 0.13531659543514252, + "loss_iou": 0.46875, + "loss_num": 0.1064453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 4809160, + "step": 77 + }, + { + "epoch": 0.259567387687188, + "grad_norm": 36.885498046875, + "learning_rate": 5e-06, + "loss": 1.6743, + "num_input_tokens_seen": 4872948, + "step": 78 + }, + { + "epoch": 0.259567387687188, + "loss": 1.5818986892700195, + "loss_ce": 0.03892991691827774, + "loss_iou": 0.4453125, + "loss_num": 0.130859375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 4872948, + "step": 78 + }, + { + "epoch": 0.2628951747088186, + "grad_norm": 12.761034965515137, + "learning_rate": 5e-06, + "loss": 1.3517, + "num_input_tokens_seen": 4934300, + "step": 79 + }, + { + "epoch": 0.2628951747088186, + "loss": 1.0315775871276855, + "loss_ce": 0.014487742446362972, + "loss_iou": 0.287109375, + "loss_num": 0.0888671875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 4934300, + "step": 79 + }, + { + "epoch": 0.26622296173044924, + "grad_norm": 13.58120059967041, + "learning_rate": 5e-06, + "loss": 1.6854, + "num_input_tokens_seen": 4997784, + "step": 80 + }, + { + "epoch": 0.26622296173044924, + "loss": 1.634024739265442, + "loss_ce": 0.04027477279305458, + "loss_iou": 0.51953125, + "loss_num": 0.111328125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 4997784, + "step": 80 + }, + { + "epoch": 0.26955074875207985, + "grad_norm": 24.133459091186523, + "learning_rate": 5e-06, + "loss": 1.4397, + "num_input_tokens_seen": 5061392, + "step": 81 + }, + { + "epoch": 0.26955074875207985, + "loss": 1.667598009109497, + "loss_ce": 0.2418166995048523, + "loss_iou": 0.384765625, + "loss_num": 0.1318359375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 5061392, + "step": 81 + }, + { + "epoch": 0.27287853577371046, + "grad_norm": 15.399471282958984, + "learning_rate": 5e-06, + "loss": 1.3346, + "num_input_tokens_seen": 5124660, + "step": 82 + }, + { + "epoch": 0.27287853577371046, + "loss": 1.3418378829956055, + "loss_ce": 0.09183788299560547, + "loss_iou": 0.400390625, + "loss_num": 0.08984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 5124660, + "step": 82 + }, + { + "epoch": 0.2762063227953411, + "grad_norm": 19.83667755126953, + "learning_rate": 5e-06, + "loss": 1.6332, + "num_input_tokens_seen": 5186724, + "step": 83 + }, + { + "epoch": 0.2762063227953411, + "loss": 1.8349742889404297, + "loss_ce": 0.2241344451904297, + "loss_iou": 0.447265625, + "loss_num": 0.1435546875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 5186724, + "step": 83 + }, + { + "epoch": 0.2795341098169717, + "grad_norm": 15.304369926452637, + "learning_rate": 5e-06, + "loss": 1.4917, + "num_input_tokens_seen": 5248960, + "step": 84 + }, + { + "epoch": 0.2795341098169717, + "loss": 1.3170514106750488, + "loss_ce": 0.10513728857040405, + "loss_iou": 0.314453125, + "loss_num": 0.11669921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 5248960, + "step": 84 + }, + { + "epoch": 0.28286189683860236, + "grad_norm": 30.498498916625977, + "learning_rate": 5e-06, + "loss": 1.7189, + "num_input_tokens_seen": 5312544, + "step": 85 + }, + { + "epoch": 0.28286189683860236, + "loss": 1.7351880073547363, + "loss_ce": 0.04475831985473633, + "loss_iou": 0.5, + "loss_num": 0.1376953125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 5312544, + "step": 85 + }, + { + "epoch": 0.28618968386023297, + "grad_norm": 30.64577865600586, + "learning_rate": 5e-06, + "loss": 1.959, + "num_input_tokens_seen": 5375812, + "step": 86 + }, + { + "epoch": 0.28618968386023297, + "loss": 2.172987461090088, + "loss_ce": 0.3106827139854431, + "loss_iou": 0.578125, + "loss_num": 0.140625, + "loss_xval": 1.859375, + "num_input_tokens_seen": 5375812, + "step": 86 + }, + { + "epoch": 0.2895174708818636, + "grad_norm": 23.82610321044922, + "learning_rate": 5e-06, + "loss": 1.6742, + "num_input_tokens_seen": 5438272, + "step": 87 + }, + { + "epoch": 0.2895174708818636, + "loss": 1.6248981952667236, + "loss_ce": 0.05556230992078781, + "loss_iou": 0.39453125, + "loss_num": 0.1572265625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 5438272, + "step": 87 + }, + { + "epoch": 0.2928452579034942, + "grad_norm": 14.44840145111084, + "learning_rate": 5e-06, + "loss": 1.7714, + "num_input_tokens_seen": 5498964, + "step": 88 + }, + { + "epoch": 0.2928452579034942, + "loss": 1.6302241086959839, + "loss_ce": 0.05844667926430702, + "loss_iou": 0.359375, + "loss_num": 0.1708984375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 5498964, + "step": 88 + }, + { + "epoch": 0.2961730449251248, + "grad_norm": 26.83960723876953, + "learning_rate": 5e-06, + "loss": 1.6978, + "num_input_tokens_seen": 5561644, + "step": 89 + }, + { + "epoch": 0.2961730449251248, + "loss": 1.8088901042938232, + "loss_ce": 0.06719096004962921, + "loss_iou": 0.4921875, + "loss_num": 0.15234375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 5561644, + "step": 89 + }, + { + "epoch": 0.2995008319467554, + "grad_norm": 11.925620079040527, + "learning_rate": 5e-06, + "loss": 1.6314, + "num_input_tokens_seen": 5624348, + "step": 90 + }, + { + "epoch": 0.2995008319467554, + "loss": 1.4735398292541504, + "loss_ce": 0.1751999855041504, + "loss_iou": 0.41015625, + "loss_num": 0.09619140625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 5624348, + "step": 90 + }, + { + "epoch": 0.30282861896838603, + "grad_norm": 11.06351089477539, + "learning_rate": 5e-06, + "loss": 1.2644, + "num_input_tokens_seen": 5686716, + "step": 91 + }, + { + "epoch": 0.30282861896838603, + "loss": 1.3133293390274048, + "loss_ce": 0.03647388890385628, + "loss_iou": 0.3671875, + "loss_num": 0.10888671875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 5686716, + "step": 91 + }, + { + "epoch": 0.30615640599001664, + "grad_norm": 18.150497436523438, + "learning_rate": 5e-06, + "loss": 1.4589, + "num_input_tokens_seen": 5750560, + "step": 92 + }, + { + "epoch": 0.30615640599001664, + "loss": 1.482915997505188, + "loss_ce": 0.026373039931058884, + "loss_iou": 0.494140625, + "loss_num": 0.09326171875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 5750560, + "step": 92 + }, + { + "epoch": 0.30948419301164726, + "grad_norm": 31.141006469726562, + "learning_rate": 5e-06, + "loss": 1.662, + "num_input_tokens_seen": 5813996, + "step": 93 + }, + { + "epoch": 0.30948419301164726, + "loss": 1.7749927043914795, + "loss_ce": 0.032805174589157104, + "loss_iou": 0.57421875, + "loss_num": 0.119140625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 5813996, + "step": 93 + }, + { + "epoch": 0.31281198003327787, + "grad_norm": 20.244047164916992, + "learning_rate": 5e-06, + "loss": 1.3877, + "num_input_tokens_seen": 5876956, + "step": 94 + }, + { + "epoch": 0.31281198003327787, + "loss": 1.3969483375549316, + "loss_ce": 0.058081258088350296, + "loss_iou": 0.421875, + "loss_num": 0.09912109375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 5876956, + "step": 94 + }, + { + "epoch": 0.3161397670549085, + "grad_norm": 10.707723617553711, + "learning_rate": 5e-06, + "loss": 1.552, + "num_input_tokens_seen": 5939564, + "step": 95 + }, + { + "epoch": 0.3161397670549085, + "loss": 1.7702038288116455, + "loss_ce": 0.031434379518032074, + "loss_iou": 0.5390625, + "loss_num": 0.1328125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 5939564, + "step": 95 + }, + { + "epoch": 0.3194675540765391, + "grad_norm": 15.120357513427734, + "learning_rate": 5e-06, + "loss": 1.3944, + "num_input_tokens_seen": 6000348, + "step": 96 + }, + { + "epoch": 0.3194675540765391, + "loss": 1.3216090202331543, + "loss_ce": 0.1492457389831543, + "loss_iou": 0.2578125, + "loss_num": 0.130859375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 6000348, + "step": 96 + }, + { + "epoch": 0.3227953410981697, + "grad_norm": 27.774568557739258, + "learning_rate": 5e-06, + "loss": 1.6287, + "num_input_tokens_seen": 6063144, + "step": 97 + }, + { + "epoch": 0.3227953410981697, + "loss": 1.5362926721572876, + "loss_ce": 0.0611950121819973, + "loss_iou": 0.400390625, + "loss_num": 0.134765625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 6063144, + "step": 97 + }, + { + "epoch": 0.3261231281198003, + "grad_norm": 23.571321487426758, + "learning_rate": 5e-06, + "loss": 1.501, + "num_input_tokens_seen": 6125216, + "step": 98 + }, + { + "epoch": 0.3261231281198003, + "loss": 1.4960654973983765, + "loss_ce": 0.02975688874721527, + "loss_iou": 0.400390625, + "loss_num": 0.1328125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 6125216, + "step": 98 + }, + { + "epoch": 0.32945091514143093, + "grad_norm": 21.455690383911133, + "learning_rate": 5e-06, + "loss": 1.2035, + "num_input_tokens_seen": 6186436, + "step": 99 + }, + { + "epoch": 0.32945091514143093, + "loss": 1.533522367477417, + "loss_ce": 0.024733252823352814, + "loss_iou": 0.482421875, + "loss_num": 0.10888671875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 6186436, + "step": 99 + }, + { + "epoch": 0.33277870216306155, + "grad_norm": 19.567928314208984, + "learning_rate": 5e-06, + "loss": 1.6169, + "num_input_tokens_seen": 6248292, + "step": 100 + }, + { + "epoch": 0.33277870216306155, + "loss": 1.5605666637420654, + "loss_ce": 0.15138691663742065, + "loss_iou": 0.31640625, + "loss_num": 0.1552734375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 6248292, + "step": 100 + }, + { + "epoch": 0.33610648918469216, + "grad_norm": 132.5515899658203, + "learning_rate": 5e-06, + "loss": 1.564, + "num_input_tokens_seen": 6312152, + "step": 101 + }, + { + "epoch": 0.33610648918469216, + "loss": 1.7496979236602783, + "loss_ce": 0.02167057991027832, + "loss_iou": 0.484375, + "loss_num": 0.1513671875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 6312152, + "step": 101 + }, + { + "epoch": 0.33943427620632277, + "grad_norm": 20.127302169799805, + "learning_rate": 5e-06, + "loss": 1.6141, + "num_input_tokens_seen": 6374832, + "step": 102 + }, + { + "epoch": 0.33943427620632277, + "loss": 1.5366733074188232, + "loss_ce": 0.15434911847114563, + "loss_iou": 0.4453125, + "loss_num": 0.09912109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 6374832, + "step": 102 + }, + { + "epoch": 0.3427620632279534, + "grad_norm": 26.46497917175293, + "learning_rate": 5e-06, + "loss": 1.7083, + "num_input_tokens_seen": 6439248, + "step": 103 + }, + { + "epoch": 0.3427620632279534, + "loss": 1.5998561382293701, + "loss_ce": 0.11841069906949997, + "loss_iou": 0.447265625, + "loss_num": 0.11767578125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 6439248, + "step": 103 + }, + { + "epoch": 0.34608985024958405, + "grad_norm": 22.090496063232422, + "learning_rate": 5e-06, + "loss": 1.5894, + "num_input_tokens_seen": 6501596, + "step": 104 + }, + { + "epoch": 0.34608985024958405, + "loss": 1.4435570240020752, + "loss_ce": 0.05586168169975281, + "loss_iou": 0.4453125, + "loss_num": 0.09912109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 6501596, + "step": 104 + }, + { + "epoch": 0.34941763727121466, + "grad_norm": 14.779273986816406, + "learning_rate": 5e-06, + "loss": 1.5203, + "num_input_tokens_seen": 6562808, + "step": 105 + }, + { + "epoch": 0.34941763727121466, + "loss": 1.5112072229385376, + "loss_ce": 0.05271116644144058, + "loss_iou": 0.376953125, + "loss_num": 0.1416015625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 6562808, + "step": 105 + }, + { + "epoch": 0.3527454242928453, + "grad_norm": 21.532625198364258, + "learning_rate": 5e-06, + "loss": 1.6474, + "num_input_tokens_seen": 6626268, + "step": 106 + }, + { + "epoch": 0.3527454242928453, + "loss": 1.6102540493011475, + "loss_ce": 0.03603534772992134, + "loss_iou": 0.5, + "loss_num": 0.115234375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 6626268, + "step": 106 + }, + { + "epoch": 0.3560732113144759, + "grad_norm": 20.450246810913086, + "learning_rate": 5e-06, + "loss": 1.6417, + "num_input_tokens_seen": 6688448, + "step": 107 + }, + { + "epoch": 0.3560732113144759, + "loss": 1.5604519844055176, + "loss_ce": 0.10928011685609818, + "loss_iou": 0.44921875, + "loss_num": 0.11083984375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 6688448, + "step": 107 + }, + { + "epoch": 0.3594009983361065, + "grad_norm": 11.704852104187012, + "learning_rate": 5e-06, + "loss": 1.4292, + "num_input_tokens_seen": 6751076, + "step": 108 + }, + { + "epoch": 0.3594009983361065, + "loss": 1.302997350692749, + "loss_ce": 0.06276305764913559, + "loss_iou": 0.375, + "loss_num": 0.0986328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 6751076, + "step": 108 + }, + { + "epoch": 0.3627287853577371, + "grad_norm": 26.655656814575195, + "learning_rate": 5e-06, + "loss": 1.5021, + "num_input_tokens_seen": 6814604, + "step": 109 + }, + { + "epoch": 0.3627287853577371, + "loss": 1.4590511322021484, + "loss_ce": 0.03766445070505142, + "loss_iou": 0.39453125, + "loss_num": 0.126953125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 6814604, + "step": 109 + }, + { + "epoch": 0.36605657237936773, + "grad_norm": 32.32645797729492, + "learning_rate": 5e-06, + "loss": 1.293, + "num_input_tokens_seen": 6877660, + "step": 110 + }, + { + "epoch": 0.36605657237936773, + "loss": 1.2947750091552734, + "loss_ce": 0.05161091312766075, + "loss_iou": 0.365234375, + "loss_num": 0.1025390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 6877660, + "step": 110 + }, + { + "epoch": 0.36938435940099834, + "grad_norm": 19.443389892578125, + "learning_rate": 5e-06, + "loss": 1.4704, + "num_input_tokens_seen": 6940376, + "step": 111 + }, + { + "epoch": 0.36938435940099834, + "loss": 1.6598196029663086, + "loss_ce": 0.016264785081148148, + "loss_iou": 0.439453125, + "loss_num": 0.15234375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 6940376, + "step": 111 + }, + { + "epoch": 0.37271214642262895, + "grad_norm": 17.155519485473633, + "learning_rate": 5e-06, + "loss": 1.468, + "num_input_tokens_seen": 7002968, + "step": 112 + }, + { + "epoch": 0.37271214642262895, + "loss": 1.4055347442626953, + "loss_ce": 0.047136228531599045, + "loss_iou": 0.32421875, + "loss_num": 0.1416015625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 7002968, + "step": 112 + }, + { + "epoch": 0.37603993344425957, + "grad_norm": 233.18862915039062, + "learning_rate": 5e-06, + "loss": 1.7817, + "num_input_tokens_seen": 7067164, + "step": 113 + }, + { + "epoch": 0.37603993344425957, + "loss": 1.9062466621398926, + "loss_ce": 0.01269207801669836, + "loss_iou": 0.58984375, + "loss_num": 0.142578125, + "loss_xval": 1.890625, + "num_input_tokens_seen": 7067164, + "step": 113 + }, + { + "epoch": 0.3793677204658902, + "grad_norm": 35.83794403076172, + "learning_rate": 5e-06, + "loss": 1.7392, + "num_input_tokens_seen": 7130484, + "step": 114 + }, + { + "epoch": 0.3793677204658902, + "loss": 1.760888934135437, + "loss_ce": 0.05971705913543701, + "loss_iou": 0.494140625, + "loss_num": 0.1416015625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 7130484, + "step": 114 + }, + { + "epoch": 0.3826955074875208, + "grad_norm": 17.227394104003906, + "learning_rate": 5e-06, + "loss": 1.4545, + "num_input_tokens_seen": 7192824, + "step": 115 + }, + { + "epoch": 0.3826955074875208, + "loss": 1.311098337173462, + "loss_ce": 0.025453729555010796, + "loss_iou": 0.380859375, + "loss_num": 0.10498046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 7192824, + "step": 115 + }, + { + "epoch": 0.3860232945091514, + "grad_norm": 20.637577056884766, + "learning_rate": 5e-06, + "loss": 1.4128, + "num_input_tokens_seen": 7254948, + "step": 116 + }, + { + "epoch": 0.3860232945091514, + "loss": 1.4620569944381714, + "loss_ce": 0.043355897068977356, + "loss_iou": 0.376953125, + "loss_num": 0.1328125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 7254948, + "step": 116 + }, + { + "epoch": 0.389351081530782, + "grad_norm": 32.69279479980469, + "learning_rate": 5e-06, + "loss": 1.7562, + "num_input_tokens_seen": 7318796, + "step": 117 + }, + { + "epoch": 0.389351081530782, + "loss": 1.807417392730713, + "loss_ce": 0.037886105477809906, + "loss_iou": 0.494140625, + "loss_num": 0.15625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 7318796, + "step": 117 + }, + { + "epoch": 0.39267886855241263, + "grad_norm": 11.888574600219727, + "learning_rate": 5e-06, + "loss": 1.3916, + "num_input_tokens_seen": 7379464, + "step": 118 + }, + { + "epoch": 0.39267886855241263, + "loss": 1.490173101425171, + "loss_ce": 0.04559309780597687, + "loss_iou": 0.4140625, + "loss_num": 0.12353515625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 7379464, + "step": 118 + }, + { + "epoch": 0.39600665557404324, + "grad_norm": 21.6405029296875, + "learning_rate": 5e-06, + "loss": 1.2959, + "num_input_tokens_seen": 7441932, + "step": 119 + }, + { + "epoch": 0.39600665557404324, + "loss": 1.2409827709197998, + "loss_ce": 0.017350006848573685, + "loss_iou": 0.302734375, + "loss_num": 0.12353515625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 7441932, + "step": 119 + }, + { + "epoch": 0.39933444259567386, + "grad_norm": 196.78163146972656, + "learning_rate": 5e-06, + "loss": 1.7212, + "num_input_tokens_seen": 7505232, + "step": 120 + }, + { + "epoch": 0.39933444259567386, + "loss": 1.5443594455718994, + "loss_ce": 0.028734492138028145, + "loss_iou": 0.48828125, + "loss_num": 0.107421875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 7505232, + "step": 120 + }, + { + "epoch": 0.40266222961730447, + "grad_norm": 118.65660858154297, + "learning_rate": 5e-06, + "loss": 1.3824, + "num_input_tokens_seen": 7568056, + "step": 121 + }, + { + "epoch": 0.40266222961730447, + "loss": 1.408996343612671, + "loss_ce": 0.02081269398331642, + "loss_iou": 0.365234375, + "loss_num": 0.130859375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 7568056, + "step": 121 + }, + { + "epoch": 0.4059900166389351, + "grad_norm": 19.64975929260254, + "learning_rate": 5e-06, + "loss": 1.5991, + "num_input_tokens_seen": 7632352, + "step": 122 + }, + { + "epoch": 0.4059900166389351, + "loss": 1.42240571975708, + "loss_ce": 0.056683123111724854, + "loss_iou": 0.46484375, + "loss_num": 0.0869140625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 7632352, + "step": 122 + }, + { + "epoch": 0.40931780366056575, + "grad_norm": 47.080657958984375, + "learning_rate": 5e-06, + "loss": 1.4264, + "num_input_tokens_seen": 7695112, + "step": 123 + }, + { + "epoch": 0.40931780366056575, + "loss": 1.4007999897003174, + "loss_ce": 0.023846877738833427, + "loss_iou": 0.42578125, + "loss_num": 0.10546875, + "loss_xval": 1.375, + "num_input_tokens_seen": 7695112, + "step": 123 + }, + { + "epoch": 0.41264559068219636, + "grad_norm": 16.035123825073242, + "learning_rate": 5e-06, + "loss": 1.5467, + "num_input_tokens_seen": 7758548, + "step": 124 + }, + { + "epoch": 0.41264559068219636, + "loss": 1.532633900642395, + "loss_ce": 0.059977658092975616, + "loss_iou": 0.447265625, + "loss_num": 0.115234375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 7758548, + "step": 124 + }, + { + "epoch": 0.415973377703827, + "grad_norm": 15.68152904510498, + "learning_rate": 5e-06, + "loss": 1.406, + "num_input_tokens_seen": 7818644, + "step": 125 + }, + { + "epoch": 0.415973377703827, + "loss": 1.4248697757720947, + "loss_ce": 0.009464464150369167, + "loss_iou": 0.388671875, + "loss_num": 0.1279296875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 7818644, + "step": 125 + }, + { + "epoch": 0.4193011647254576, + "grad_norm": 23.207124710083008, + "learning_rate": 5e-06, + "loss": 1.5815, + "num_input_tokens_seen": 7881772, + "step": 126 + }, + { + "epoch": 0.4193011647254576, + "loss": 1.6510601043701172, + "loss_ce": 0.010435021482408047, + "loss_iou": 0.458984375, + "loss_num": 0.14453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 7881772, + "step": 126 + }, + { + "epoch": 0.4226289517470882, + "grad_norm": 17.7788028717041, + "learning_rate": 5e-06, + "loss": 1.3724, + "num_input_tokens_seen": 7943924, + "step": 127 + }, + { + "epoch": 0.4226289517470882, + "loss": 1.4064964056015015, + "loss_ce": 0.027834344655275345, + "loss_iou": 0.359375, + "loss_num": 0.1318359375, + "loss_xval": 1.375, + "num_input_tokens_seen": 7943924, + "step": 127 + }, + { + "epoch": 0.4259567387687188, + "grad_norm": 22.62128448486328, + "learning_rate": 5e-06, + "loss": 1.6814, + "num_input_tokens_seen": 8007756, + "step": 128 + }, + { + "epoch": 0.4259567387687188, + "loss": 1.7109909057617188, + "loss_ce": 0.05376438423991203, + "loss_iou": 0.46484375, + "loss_num": 0.1455078125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 8007756, + "step": 128 + }, + { + "epoch": 0.4292845257903494, + "grad_norm": 17.13036346435547, + "learning_rate": 5e-06, + "loss": 1.5, + "num_input_tokens_seen": 8071624, + "step": 129 + }, + { + "epoch": 0.4292845257903494, + "loss": 1.517619252204895, + "loss_ce": 0.04252162203192711, + "loss_iou": 0.46875, + "loss_num": 0.10693359375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 8071624, + "step": 129 + }, + { + "epoch": 0.43261231281198004, + "grad_norm": 28.38715934753418, + "learning_rate": 5e-06, + "loss": 1.6712, + "num_input_tokens_seen": 8135204, + "step": 130 + }, + { + "epoch": 0.43261231281198004, + "loss": 1.6535531282424927, + "loss_ce": 0.016834398731589317, + "loss_iou": 0.51171875, + "loss_num": 0.123046875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 8135204, + "step": 130 + }, + { + "epoch": 0.43594009983361065, + "grad_norm": 21.765769958496094, + "learning_rate": 5e-06, + "loss": 1.2592, + "num_input_tokens_seen": 8198328, + "step": 131 + }, + { + "epoch": 0.43594009983361065, + "loss": 1.0689215660095215, + "loss_ce": 0.04450753331184387, + "loss_iou": 0.30859375, + "loss_num": 0.08203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 8198328, + "step": 131 + }, + { + "epoch": 0.43926788685524126, + "grad_norm": 12.410709381103516, + "learning_rate": 5e-06, + "loss": 1.2945, + "num_input_tokens_seen": 8260932, + "step": 132 + }, + { + "epoch": 0.43926788685524126, + "loss": 1.2807693481445312, + "loss_ce": 0.014656049199402332, + "loss_iou": 0.32421875, + "loss_num": 0.12353515625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 8260932, + "step": 132 + }, + { + "epoch": 0.4425956738768719, + "grad_norm": 24.125675201416016, + "learning_rate": 5e-06, + "loss": 1.4355, + "num_input_tokens_seen": 8324024, + "step": 133 + }, + { + "epoch": 0.4425956738768719, + "loss": 1.2214634418487549, + "loss_ce": 0.035916537046432495, + "loss_iou": 0.28125, + "loss_num": 0.12451171875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 8324024, + "step": 133 + }, + { + "epoch": 0.4459234608985025, + "grad_norm": 249.4659423828125, + "learning_rate": 5e-06, + "loss": 1.2783, + "num_input_tokens_seen": 8386732, + "step": 134 + }, + { + "epoch": 0.4459234608985025, + "loss": 1.165820837020874, + "loss_ce": 0.022754406556487083, + "loss_iou": 0.34765625, + "loss_num": 0.08935546875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 8386732, + "step": 134 + }, + { + "epoch": 0.4492512479201331, + "grad_norm": 15.685258865356445, + "learning_rate": 5e-06, + "loss": 1.6974, + "num_input_tokens_seen": 8449704, + "step": 135 + }, + { + "epoch": 0.4492512479201331, + "loss": 1.7366528511047363, + "loss_ce": 0.01448477990925312, + "loss_iou": 0.515625, + "loss_num": 0.1376953125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 8449704, + "step": 135 + }, + { + "epoch": 0.4525790349417637, + "grad_norm": 15.560267448425293, + "learning_rate": 5e-06, + "loss": 1.3367, + "num_input_tokens_seen": 8511980, + "step": 136 + }, + { + "epoch": 0.4525790349417637, + "loss": 1.5319541692733765, + "loss_ce": 0.06418071687221527, + "loss_iou": 0.50390625, + "loss_num": 0.091796875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 8511980, + "step": 136 + }, + { + "epoch": 0.4559068219633943, + "grad_norm": 31.106922149658203, + "learning_rate": 5e-06, + "loss": 1.3191, + "num_input_tokens_seen": 8575108, + "step": 137 + }, + { + "epoch": 0.4559068219633943, + "loss": 1.1032752990722656, + "loss_ce": 0.01050189882516861, + "loss_iou": 0.2734375, + "loss_num": 0.10888671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 8575108, + "step": 137 + }, + { + "epoch": 0.45923460898502494, + "grad_norm": 19.03159523010254, + "learning_rate": 5e-06, + "loss": 1.51, + "num_input_tokens_seen": 8637260, + "step": 138 + }, + { + "epoch": 0.45923460898502494, + "loss": 1.4713654518127441, + "loss_ce": 0.05046709254384041, + "loss_iou": 0.416015625, + "loss_num": 0.1171875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 8637260, + "step": 138 + }, + { + "epoch": 0.46256239600665555, + "grad_norm": 18.39104652404785, + "learning_rate": 5e-06, + "loss": 1.5178, + "num_input_tokens_seen": 8699912, + "step": 139 + }, + { + "epoch": 0.46256239600665555, + "loss": 1.5592514276504517, + "loss_ce": 0.04411466047167778, + "loss_iou": 0.49609375, + "loss_num": 0.1044921875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 8699912, + "step": 139 + }, + { + "epoch": 0.46589018302828616, + "grad_norm": 16.95204734802246, + "learning_rate": 5e-06, + "loss": 1.4614, + "num_input_tokens_seen": 8763456, + "step": 140 + }, + { + "epoch": 0.46589018302828616, + "loss": 1.5307507514953613, + "loss_ce": 0.026844505220651627, + "loss_iou": 0.419921875, + "loss_num": 0.1328125, + "loss_xval": 1.5, + "num_input_tokens_seen": 8763456, + "step": 140 + }, + { + "epoch": 0.46921797004991683, + "grad_norm": 13.439885139465332, + "learning_rate": 5e-06, + "loss": 1.2669, + "num_input_tokens_seen": 8823008, + "step": 141 + }, + { + "epoch": 0.46921797004991683, + "loss": 1.1225981712341309, + "loss_ce": 0.03861372545361519, + "loss_iou": 0.2294921875, + "loss_num": 0.125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 8823008, + "step": 141 + }, + { + "epoch": 0.47254575707154745, + "grad_norm": 13.823068618774414, + "learning_rate": 5e-06, + "loss": 1.3294, + "num_input_tokens_seen": 8885756, + "step": 142 + }, + { + "epoch": 0.47254575707154745, + "loss": 1.3653852939605713, + "loss_ce": 0.011625496670603752, + "loss_iou": 0.3671875, + "loss_num": 0.1240234375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 8885756, + "step": 142 + }, + { + "epoch": 0.47587354409317806, + "grad_norm": 46.61298751831055, + "learning_rate": 5e-06, + "loss": 1.6217, + "num_input_tokens_seen": 8949676, + "step": 143 + }, + { + "epoch": 0.47587354409317806, + "loss": 1.6690086126327515, + "loss_ce": 0.08209459483623505, + "loss_iou": 0.4453125, + "loss_num": 0.1396484375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 8949676, + "step": 143 + }, + { + "epoch": 0.47920133111480867, + "grad_norm": 12.167203903198242, + "learning_rate": 5e-06, + "loss": 1.1975, + "num_input_tokens_seen": 9012212, + "step": 144 + }, + { + "epoch": 0.47920133111480867, + "loss": 1.0818878412246704, + "loss_ce": 0.06528623402118683, + "loss_iou": 0.302734375, + "loss_num": 0.08251953125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 9012212, + "step": 144 + }, + { + "epoch": 0.4825291181364393, + "grad_norm": 19.003299713134766, + "learning_rate": 5e-06, + "loss": 1.3821, + "num_input_tokens_seen": 9075056, + "step": 145 + }, + { + "epoch": 0.4825291181364393, + "loss": 1.3460979461669922, + "loss_ce": 0.01919359713792801, + "loss_iou": 0.404296875, + "loss_num": 0.103515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 9075056, + "step": 145 + }, + { + "epoch": 0.4858569051580699, + "grad_norm": 30.158510208129883, + "learning_rate": 5e-06, + "loss": 1.3432, + "num_input_tokens_seen": 9138200, + "step": 146 + }, + { + "epoch": 0.4858569051580699, + "loss": 1.3490657806396484, + "loss_ce": 0.0026301806792616844, + "loss_iou": 0.4453125, + "loss_num": 0.09130859375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 9138200, + "step": 146 + }, + { + "epoch": 0.4891846921797005, + "grad_norm": 13.28848648071289, + "learning_rate": 5e-06, + "loss": 1.2648, + "num_input_tokens_seen": 9201716, + "step": 147 + }, + { + "epoch": 0.4891846921797005, + "loss": 1.31808340549469, + "loss_ce": 0.007536512799561024, + "loss_iou": 0.400390625, + "loss_num": 0.1015625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 9201716, + "step": 147 + }, + { + "epoch": 0.4925124792013311, + "grad_norm": 22.02879524230957, + "learning_rate": 5e-06, + "loss": 1.2787, + "num_input_tokens_seen": 9264876, + "step": 148 + }, + { + "epoch": 0.4925124792013311, + "loss": 1.3372578620910645, + "loss_ce": 0.014503922313451767, + "loss_iou": 0.388671875, + "loss_num": 0.10888671875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 9264876, + "step": 148 + }, + { + "epoch": 0.49584026622296173, + "grad_norm": 15.431622505187988, + "learning_rate": 5e-06, + "loss": 1.4477, + "num_input_tokens_seen": 9327456, + "step": 149 + }, + { + "epoch": 0.49584026622296173, + "loss": 1.287103295326233, + "loss_ce": 0.03173219412565231, + "loss_iou": 0.38671875, + "loss_num": 0.09619140625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 9327456, + "step": 149 + }, + { + "epoch": 0.49916805324459235, + "grad_norm": 14.655500411987305, + "learning_rate": 5e-06, + "loss": 1.5951, + "num_input_tokens_seen": 9391828, + "step": 150 + }, + { + "epoch": 0.49916805324459235, + "loss": 1.5570836067199707, + "loss_ce": 0.03169288486242294, + "loss_iou": 0.46875, + "loss_num": 0.11767578125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 9391828, + "step": 150 + }, + { + "epoch": 0.502495840266223, + "grad_norm": 12.9921236038208, + "learning_rate": 5e-06, + "loss": 1.1374, + "num_input_tokens_seen": 9453992, + "step": 151 + }, + { + "epoch": 0.502495840266223, + "loss": 1.1639986038208008, + "loss_ce": 0.00481888884678483, + "loss_iou": 0.2578125, + "loss_num": 0.12890625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 9453992, + "step": 151 + }, + { + "epoch": 0.5058236272878536, + "grad_norm": 12.746012687683105, + "learning_rate": 5e-06, + "loss": 1.4932, + "num_input_tokens_seen": 9515028, + "step": 152 + }, + { + "epoch": 0.5058236272878536, + "loss": 1.3379508256912231, + "loss_ce": 0.004943011794239283, + "loss_iou": 0.43359375, + "loss_num": 0.0927734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 9515028, + "step": 152 + }, + { + "epoch": 0.5091514143094842, + "grad_norm": 12.805912017822266, + "learning_rate": 5e-06, + "loss": 1.3026, + "num_input_tokens_seen": 9576904, + "step": 153 + }, + { + "epoch": 0.5091514143094842, + "loss": 1.1774930953979492, + "loss_ce": 0.010500917211174965, + "loss_iou": 0.294921875, + "loss_num": 0.1162109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 9576904, + "step": 153 + }, + { + "epoch": 0.5124792013311148, + "grad_norm": 11.221924781799316, + "learning_rate": 5e-06, + "loss": 1.4352, + "num_input_tokens_seen": 9640896, + "step": 154 + }, + { + "epoch": 0.5124792013311148, + "loss": 1.4379901885986328, + "loss_ce": 0.014650269411504269, + "loss_iou": 0.474609375, + "loss_num": 0.09521484375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 9640896, + "step": 154 + }, + { + "epoch": 0.5158069883527454, + "grad_norm": 29.179161071777344, + "learning_rate": 5e-06, + "loss": 1.4006, + "num_input_tokens_seen": 9704416, + "step": 155 + }, + { + "epoch": 0.5158069883527454, + "loss": 1.2948763370513916, + "loss_ce": 0.02729811705648899, + "loss_iou": 0.361328125, + "loss_num": 0.109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 9704416, + "step": 155 + }, + { + "epoch": 0.519134775374376, + "grad_norm": 30.093252182006836, + "learning_rate": 5e-06, + "loss": 1.5561, + "num_input_tokens_seen": 9767272, + "step": 156 + }, + { + "epoch": 0.519134775374376, + "loss": 1.4946575164794922, + "loss_ce": 0.012723930180072784, + "loss_iou": 0.46875, + "loss_num": 0.1083984375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 9767272, + "step": 156 + }, + { + "epoch": 0.5224625623960066, + "grad_norm": 14.34788990020752, + "learning_rate": 5e-06, + "loss": 1.4241, + "num_input_tokens_seen": 9829412, + "step": 157 + }, + { + "epoch": 0.5224625623960066, + "loss": 1.4533286094665527, + "loss_ce": 0.002156792674213648, + "loss_iou": 0.451171875, + "loss_num": 0.109375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 9829412, + "step": 157 + }, + { + "epoch": 0.5257903494176372, + "grad_norm": 45.826786041259766, + "learning_rate": 5e-06, + "loss": 1.1328, + "num_input_tokens_seen": 9891184, + "step": 158 + }, + { + "epoch": 0.5257903494176372, + "loss": 1.2606561183929443, + "loss_ce": 0.008825048804283142, + "loss_iou": 0.4375, + "loss_num": 0.07568359375, + "loss_xval": 1.25, + "num_input_tokens_seen": 9891184, + "step": 158 + }, + { + "epoch": 0.5291181364392679, + "grad_norm": 14.152484893798828, + "learning_rate": 5e-06, + "loss": 1.2858, + "num_input_tokens_seen": 9952284, + "step": 159 + }, + { + "epoch": 0.5291181364392679, + "loss": 1.2074310779571533, + "loss_ce": 0.024569693952798843, + "loss_iou": 0.330078125, + "loss_num": 0.1044921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 9952284, + "step": 159 + }, + { + "epoch": 0.5324459234608985, + "grad_norm": 37.07005310058594, + "learning_rate": 5e-06, + "loss": 1.5076, + "num_input_tokens_seen": 10014964, + "step": 160 + }, + { + "epoch": 0.5324459234608985, + "loss": 1.5249152183532715, + "loss_ce": 0.008313634432852268, + "loss_iou": 0.43359375, + "loss_num": 0.1298828125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 10014964, + "step": 160 + }, + { + "epoch": 0.5357737104825291, + "grad_norm": 17.514577865600586, + "learning_rate": 5e-06, + "loss": 1.2458, + "num_input_tokens_seen": 10078884, + "step": 161 + }, + { + "epoch": 0.5357737104825291, + "loss": 0.9992653727531433, + "loss_ce": 0.006589556112885475, + "loss_iou": 0.27734375, + "loss_num": 0.08740234375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 10078884, + "step": 161 + }, + { + "epoch": 0.5391014975041597, + "grad_norm": 19.246084213256836, + "learning_rate": 5e-06, + "loss": 1.3381, + "num_input_tokens_seen": 10141460, + "step": 162 + }, + { + "epoch": 0.5391014975041597, + "loss": 1.0187444686889648, + "loss_ce": 0.012152590788900852, + "loss_iou": 0.24609375, + "loss_num": 0.10302734375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 10141460, + "step": 162 + }, + { + "epoch": 0.5424292845257903, + "grad_norm": 14.021342277526855, + "learning_rate": 5e-06, + "loss": 1.2232, + "num_input_tokens_seen": 10203772, + "step": 163 + }, + { + "epoch": 0.5424292845257903, + "loss": 1.3046138286590576, + "loss_ce": 0.004809187725186348, + "loss_iou": 0.40234375, + "loss_num": 0.0986328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 10203772, + "step": 163 + }, + { + "epoch": 0.5457570715474209, + "grad_norm": 20.620309829711914, + "learning_rate": 5e-06, + "loss": 1.3921, + "num_input_tokens_seen": 10266640, + "step": 164 + }, + { + "epoch": 0.5457570715474209, + "loss": 1.3793249130249023, + "loss_ce": 0.012625731527805328, + "loss_iou": 0.478515625, + "loss_num": 0.08154296875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 10266640, + "step": 164 + }, + { + "epoch": 0.5490848585690515, + "grad_norm": 27.303794860839844, + "learning_rate": 5e-06, + "loss": 1.3677, + "num_input_tokens_seen": 10330220, + "step": 165 + }, + { + "epoch": 0.5490848585690515, + "loss": 1.2398979663848877, + "loss_ce": 0.002593299839645624, + "loss_iou": 0.416015625, + "loss_num": 0.0810546875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 10330220, + "step": 165 + }, + { + "epoch": 0.5524126455906821, + "grad_norm": 15.321734428405762, + "learning_rate": 5e-06, + "loss": 1.3985, + "num_input_tokens_seen": 10392312, + "step": 166 + }, + { + "epoch": 0.5524126455906821, + "loss": 1.1850658655166626, + "loss_ce": 0.030891068279743195, + "loss_iou": 0.279296875, + "loss_num": 0.119140625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 10392312, + "step": 166 + }, + { + "epoch": 0.5557404326123128, + "grad_norm": 48.359825134277344, + "learning_rate": 5e-06, + "loss": 1.1362, + "num_input_tokens_seen": 10455024, + "step": 167 + }, + { + "epoch": 0.5557404326123128, + "loss": 1.3550812005996704, + "loss_ce": 0.0035187224857509136, + "loss_iou": 0.419921875, + "loss_num": 0.1025390625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 10455024, + "step": 167 + }, + { + "epoch": 0.5590682196339434, + "grad_norm": 31.070018768310547, + "learning_rate": 5e-06, + "loss": 1.0637, + "num_input_tokens_seen": 10516804, + "step": 168 + }, + { + "epoch": 0.5590682196339434, + "loss": 0.8580918312072754, + "loss_ce": 0.006285225972533226, + "loss_iou": 0.1767578125, + "loss_num": 0.099609375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 10516804, + "step": 168 + }, + { + "epoch": 0.562396006655574, + "grad_norm": 13.717340469360352, + "learning_rate": 5e-06, + "loss": 1.3105, + "num_input_tokens_seen": 10579816, + "step": 169 + }, + { + "epoch": 0.562396006655574, + "loss": 1.291137933731079, + "loss_ce": 0.017700420692563057, + "loss_iou": 0.40625, + "loss_num": 0.09228515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 10579816, + "step": 169 + }, + { + "epoch": 0.5657237936772047, + "grad_norm": 14.410500526428223, + "learning_rate": 5e-06, + "loss": 1.1444, + "num_input_tokens_seen": 10640804, + "step": 170 + }, + { + "epoch": 0.5657237936772047, + "loss": 1.0607528686523438, + "loss_ce": 0.012901253998279572, + "loss_iou": 0.2578125, + "loss_num": 0.1064453125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 10640804, + "step": 170 + }, + { + "epoch": 0.5690515806988353, + "grad_norm": 21.776578903198242, + "learning_rate": 5e-06, + "loss": 1.5343, + "num_input_tokens_seen": 10705984, + "step": 171 + }, + { + "epoch": 0.5690515806988353, + "loss": 1.5333166122436523, + "loss_ce": 0.013785396702587605, + "loss_iou": 0.54296875, + "loss_num": 0.0869140625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 10705984, + "step": 171 + }, + { + "epoch": 0.5723793677204659, + "grad_norm": 20.108116149902344, + "learning_rate": 5e-06, + "loss": 1.4882, + "num_input_tokens_seen": 10768956, + "step": 172 + }, + { + "epoch": 0.5723793677204659, + "loss": 1.473107099533081, + "loss_ce": 0.015587646514177322, + "loss_iou": 0.482421875, + "loss_num": 0.0986328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 10768956, + "step": 172 + }, + { + "epoch": 0.5757071547420965, + "grad_norm": 60.56779479980469, + "learning_rate": 5e-06, + "loss": 1.6353, + "num_input_tokens_seen": 10832704, + "step": 173 + }, + { + "epoch": 0.5757071547420965, + "loss": 1.7748465538024902, + "loss_ce": 0.0072684986516833305, + "loss_iou": 0.5234375, + "loss_num": 0.14453125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 10832704, + "step": 173 + }, + { + "epoch": 0.5790349417637272, + "grad_norm": 19.885292053222656, + "learning_rate": 5e-06, + "loss": 1.3888, + "num_input_tokens_seen": 10895876, + "step": 174 + }, + { + "epoch": 0.5790349417637272, + "loss": 1.2018749713897705, + "loss_ce": 0.0016796982381492853, + "loss_iou": 0.365234375, + "loss_num": 0.09423828125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 10895876, + "step": 174 + }, + { + "epoch": 0.5823627287853578, + "grad_norm": 32.432376861572266, + "learning_rate": 5e-06, + "loss": 1.2947, + "num_input_tokens_seen": 10957804, + "step": 175 + }, + { + "epoch": 0.5823627287853578, + "loss": 1.3988152742385864, + "loss_ce": 0.003551559057086706, + "loss_iou": 0.42578125, + "loss_num": 0.1083984375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 10957804, + "step": 175 + }, + { + "epoch": 0.5856905158069884, + "grad_norm": 81.50908660888672, + "learning_rate": 5e-06, + "loss": 1.1717, + "num_input_tokens_seen": 11017420, + "step": 176 + }, + { + "epoch": 0.5856905158069884, + "loss": 1.09592866897583, + "loss_ce": 0.021709948778152466, + "loss_iou": 0.189453125, + "loss_num": 0.138671875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 11017420, + "step": 176 + }, + { + "epoch": 0.589018302828619, + "grad_norm": 14.417983055114746, + "learning_rate": 5e-06, + "loss": 1.5833, + "num_input_tokens_seen": 11080940, + "step": 177 + }, + { + "epoch": 0.589018302828619, + "loss": 1.7335617542266846, + "loss_ce": 0.020182903856039047, + "loss_iou": 0.5234375, + "loss_num": 0.1337890625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 11080940, + "step": 177 + }, + { + "epoch": 0.5923460898502496, + "grad_norm": 67.00255584716797, + "learning_rate": 5e-06, + "loss": 1.3181, + "num_input_tokens_seen": 11144980, + "step": 178 + }, + { + "epoch": 0.5923460898502496, + "loss": 1.448054313659668, + "loss_ce": 0.03496834263205528, + "loss_iou": 0.462890625, + "loss_num": 0.09716796875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 11144980, + "step": 178 + }, + { + "epoch": 0.5956738768718802, + "grad_norm": 17.97549057006836, + "learning_rate": 5e-06, + "loss": 1.3473, + "num_input_tokens_seen": 11208712, + "step": 179 + }, + { + "epoch": 0.5956738768718802, + "loss": 1.2413945198059082, + "loss_ce": 0.0031132774893194437, + "loss_iou": 0.408203125, + "loss_num": 0.08447265625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 11208712, + "step": 179 + }, + { + "epoch": 0.5990016638935108, + "grad_norm": 23.58061408996582, + "learning_rate": 5e-06, + "loss": 1.3682, + "num_input_tokens_seen": 11271028, + "step": 180 + }, + { + "epoch": 0.5990016638935108, + "loss": 1.1757802963256836, + "loss_ce": 0.0268545038998127, + "loss_iou": 0.330078125, + "loss_num": 0.09814453125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 11271028, + "step": 180 + }, + { + "epoch": 0.6023294509151415, + "grad_norm": 15.865267753601074, + "learning_rate": 5e-06, + "loss": 1.1611, + "num_input_tokens_seen": 11332560, + "step": 181 + }, + { + "epoch": 0.6023294509151415, + "loss": 1.332716941833496, + "loss_ce": 0.01826382614672184, + "loss_iou": 0.376953125, + "loss_num": 0.1123046875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 11332560, + "step": 181 + }, + { + "epoch": 0.6056572379367721, + "grad_norm": 14.636528968811035, + "learning_rate": 5e-06, + "loss": 1.4119, + "num_input_tokens_seen": 11394592, + "step": 182 + }, + { + "epoch": 0.6056572379367721, + "loss": 1.3619801998138428, + "loss_ce": 0.004070098511874676, + "loss_iou": 0.421875, + "loss_num": 0.1025390625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 11394592, + "step": 182 + }, + { + "epoch": 0.6089850249584027, + "grad_norm": 14.351191520690918, + "learning_rate": 5e-06, + "loss": 1.3834, + "num_input_tokens_seen": 11457912, + "step": 183 + }, + { + "epoch": 0.6089850249584027, + "loss": 1.633519172668457, + "loss_ce": 0.019261367619037628, + "loss_iou": 0.55078125, + "loss_num": 0.10302734375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 11457912, + "step": 183 + }, + { + "epoch": 0.6123128119800333, + "grad_norm": 16.637283325195312, + "learning_rate": 5e-06, + "loss": 1.23, + "num_input_tokens_seen": 11521492, + "step": 184 + }, + { + "epoch": 0.6123128119800333, + "loss": 1.2156696319580078, + "loss_ce": 0.05990792065858841, + "loss_iou": 0.3359375, + "loss_num": 0.09716796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 11521492, + "step": 184 + }, + { + "epoch": 0.6156405990016639, + "grad_norm": 34.948036193847656, + "learning_rate": 5e-06, + "loss": 1.5363, + "num_input_tokens_seen": 11584208, + "step": 185 + }, + { + "epoch": 0.6156405990016639, + "loss": 1.6890095472335815, + "loss_ce": 0.034224435687065125, + "loss_iou": 0.5859375, + "loss_num": 0.09619140625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 11584208, + "step": 185 + }, + { + "epoch": 0.6189683860232945, + "grad_norm": 14.760610580444336, + "learning_rate": 5e-06, + "loss": 1.129, + "num_input_tokens_seen": 11643560, + "step": 186 + }, + { + "epoch": 0.6189683860232945, + "loss": 1.1673153638839722, + "loss_ce": 0.0008114665979519486, + "loss_iou": 0.2734375, + "loss_num": 0.1240234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 11643560, + "step": 186 + }, + { + "epoch": 0.6222961730449251, + "grad_norm": 16.94452667236328, + "learning_rate": 5e-06, + "loss": 1.2258, + "num_input_tokens_seen": 11705768, + "step": 187 + }, + { + "epoch": 0.6222961730449251, + "loss": 1.3460886478424072, + "loss_ce": 0.0033152345567941666, + "loss_iou": 0.4375, + "loss_num": 0.09326171875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 11705768, + "step": 187 + }, + { + "epoch": 0.6256239600665557, + "grad_norm": 11.506011009216309, + "learning_rate": 5e-06, + "loss": 1.3508, + "num_input_tokens_seen": 11766764, + "step": 188 + }, + { + "epoch": 0.6256239600665557, + "loss": 1.2646206617355347, + "loss_ce": 0.04477202519774437, + "loss_iou": 0.337890625, + "loss_num": 0.10888671875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 11766764, + "step": 188 + }, + { + "epoch": 0.6289517470881864, + "grad_norm": 13.971025466918945, + "learning_rate": 5e-06, + "loss": 1.395, + "num_input_tokens_seen": 11829848, + "step": 189 + }, + { + "epoch": 0.6289517470881864, + "loss": 1.7523114681243896, + "loss_ce": 0.011588791385293007, + "loss_iou": 0.5390625, + "loss_num": 0.1328125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 11829848, + "step": 189 + }, + { + "epoch": 0.632279534109817, + "grad_norm": 34.798828125, + "learning_rate": 5e-06, + "loss": 1.3202, + "num_input_tokens_seen": 11893144, + "step": 190 + }, + { + "epoch": 0.632279534109817, + "loss": 1.4185304641723633, + "loss_ce": 0.0669679343700409, + "loss_iou": 0.369140625, + "loss_num": 0.123046875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 11893144, + "step": 190 + }, + { + "epoch": 0.6356073211314476, + "grad_norm": 23.807941436767578, + "learning_rate": 5e-06, + "loss": 1.3981, + "num_input_tokens_seen": 11956764, + "step": 191 + }, + { + "epoch": 0.6356073211314476, + "loss": 1.4876669645309448, + "loss_ce": 0.012813407927751541, + "loss_iou": 0.46484375, + "loss_num": 0.10888671875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 11956764, + "step": 191 + }, + { + "epoch": 0.6389351081530782, + "grad_norm": 10.772053718566895, + "learning_rate": 5e-06, + "loss": 1.3734, + "num_input_tokens_seen": 12020672, + "step": 192 + }, + { + "epoch": 0.6389351081530782, + "loss": 1.2527387142181396, + "loss_ce": 0.014945641160011292, + "loss_iou": 0.40234375, + "loss_num": 0.08642578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 12020672, + "step": 192 + }, + { + "epoch": 0.6422628951747088, + "grad_norm": 16.477436065673828, + "learning_rate": 5e-06, + "loss": 1.3591, + "num_input_tokens_seen": 12083392, + "step": 193 + }, + { + "epoch": 0.6422628951747088, + "loss": 1.431098461151123, + "loss_ce": 0.002875822363421321, + "loss_iou": 0.404296875, + "loss_num": 0.12451171875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 12083392, + "step": 193 + }, + { + "epoch": 0.6455906821963394, + "grad_norm": 12.675455093383789, + "learning_rate": 5e-06, + "loss": 1.3098, + "num_input_tokens_seen": 12146444, + "step": 194 + }, + { + "epoch": 0.6455906821963394, + "loss": 1.375436782836914, + "loss_ce": 0.015329442918300629, + "loss_iou": 0.392578125, + "loss_num": 0.115234375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 12146444, + "step": 194 + }, + { + "epoch": 0.64891846921797, + "grad_norm": 15.562612533569336, + "learning_rate": 5e-06, + "loss": 1.0385, + "num_input_tokens_seen": 12208136, + "step": 195 + }, + { + "epoch": 0.64891846921797, + "loss": 1.0402494668960571, + "loss_ce": 0.0009428322082385421, + "loss_iou": 0.314453125, + "loss_num": 0.08203125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 12208136, + "step": 195 + }, + { + "epoch": 0.6522462562396006, + "grad_norm": 28.349838256835938, + "learning_rate": 5e-06, + "loss": 1.197, + "num_input_tokens_seen": 12270244, + "step": 196 + }, + { + "epoch": 0.6522462562396006, + "loss": 1.0950136184692383, + "loss_ce": 0.006146472413092852, + "loss_iou": 0.345703125, + "loss_num": 0.07958984375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 12270244, + "step": 196 + }, + { + "epoch": 0.6555740432612313, + "grad_norm": 16.476137161254883, + "learning_rate": 5e-06, + "loss": 1.3331, + "num_input_tokens_seen": 12333024, + "step": 197 + }, + { + "epoch": 0.6555740432612313, + "loss": 1.3048908710479736, + "loss_ce": 0.005574405658990145, + "loss_iou": 0.34375, + "loss_num": 0.1220703125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 12333024, + "step": 197 + }, + { + "epoch": 0.6589018302828619, + "grad_norm": 13.58584976196289, + "learning_rate": 5e-06, + "loss": 1.2588, + "num_input_tokens_seen": 12396340, + "step": 198 + }, + { + "epoch": 0.6589018302828619, + "loss": 1.4304707050323486, + "loss_ce": 0.005665954202413559, + "loss_iou": 0.42578125, + "loss_num": 0.115234375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 12396340, + "step": 198 + }, + { + "epoch": 0.6622296173044925, + "grad_norm": 38.477630615234375, + "learning_rate": 5e-06, + "loss": 1.3283, + "num_input_tokens_seen": 12459396, + "step": 199 + }, + { + "epoch": 0.6622296173044925, + "loss": 1.2707278728485107, + "loss_ce": 0.000708332285284996, + "loss_iou": 0.337890625, + "loss_num": 0.11865234375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 12459396, + "step": 199 + }, + { + "epoch": 0.6655574043261231, + "grad_norm": 30.41405487060547, + "learning_rate": 5e-06, + "loss": 1.1544, + "num_input_tokens_seen": 12522324, + "step": 200 + }, + { + "epoch": 0.6655574043261231, + "loss": 0.9297307133674622, + "loss_ce": 0.007001231890171766, + "loss_iou": 0.2109375, + "loss_num": 0.10009765625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 12522324, + "step": 200 + }, + { + "epoch": 0.6688851913477537, + "grad_norm": 32.16323471069336, + "learning_rate": 5e-06, + "loss": 1.4879, + "num_input_tokens_seen": 12585900, + "step": 201 + }, + { + "epoch": 0.6688851913477537, + "loss": 1.5877773761749268, + "loss_ce": 0.02771873027086258, + "loss_iou": 0.427734375, + "loss_num": 0.140625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 12585900, + "step": 201 + }, + { + "epoch": 0.6722129783693843, + "grad_norm": 19.184961318969727, + "learning_rate": 5e-06, + "loss": 1.3649, + "num_input_tokens_seen": 12647776, + "step": 202 + }, + { + "epoch": 0.6722129783693843, + "loss": 1.634108066558838, + "loss_ce": 0.015455802902579308, + "loss_iou": 0.515625, + "loss_num": 0.11767578125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 12647776, + "step": 202 + }, + { + "epoch": 0.6755407653910149, + "grad_norm": 13.572768211364746, + "learning_rate": 5e-06, + "loss": 1.0858, + "num_input_tokens_seen": 12708056, + "step": 203 + }, + { + "epoch": 0.6755407653910149, + "loss": 0.7612197995185852, + "loss_ce": 0.04417877644300461, + "loss_iou": 0.10546875, + "loss_num": 0.10107421875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 12708056, + "step": 203 + }, + { + "epoch": 0.6788685524126455, + "grad_norm": 15.890493392944336, + "learning_rate": 5e-06, + "loss": 1.139, + "num_input_tokens_seen": 12770736, + "step": 204 + }, + { + "epoch": 0.6788685524126455, + "loss": 1.2692952156066895, + "loss_ce": 0.0007404821808449924, + "loss_iou": 0.40625, + "loss_num": 0.0908203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 12770736, + "step": 204 + }, + { + "epoch": 0.6821963394342762, + "grad_norm": 25.105133056640625, + "learning_rate": 5e-06, + "loss": 1.4867, + "num_input_tokens_seen": 12834956, + "step": 205 + }, + { + "epoch": 0.6821963394342762, + "loss": 1.2058188915252686, + "loss_ce": 0.002937908982858062, + "loss_iou": 0.421875, + "loss_num": 0.072265625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 12834956, + "step": 205 + }, + { + "epoch": 0.6855241264559068, + "grad_norm": 57.6992073059082, + "learning_rate": 5e-06, + "loss": 1.2759, + "num_input_tokens_seen": 12898988, + "step": 206 + }, + { + "epoch": 0.6855241264559068, + "loss": 1.0813333988189697, + "loss_ce": 0.03372599184513092, + "loss_iou": 0.283203125, + "loss_num": 0.09619140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 12898988, + "step": 206 + }, + { + "epoch": 0.6888519134775375, + "grad_norm": 17.233978271484375, + "learning_rate": 5e-06, + "loss": 1.1929, + "num_input_tokens_seen": 12962164, + "step": 207 + }, + { + "epoch": 0.6888519134775375, + "loss": 1.2135835886001587, + "loss_ce": 0.01924763433635235, + "loss_iou": 0.439453125, + "loss_num": 0.0625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 12962164, + "step": 207 + }, + { + "epoch": 0.6921797004991681, + "grad_norm": 16.188570022583008, + "learning_rate": 5e-06, + "loss": 1.3785, + "num_input_tokens_seen": 13024784, + "step": 208 + }, + { + "epoch": 0.6921797004991681, + "loss": 1.215705156326294, + "loss_ce": 0.005255852825939655, + "loss_iou": 0.373046875, + "loss_num": 0.09326171875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 13024784, + "step": 208 + }, + { + "epoch": 0.6955074875207987, + "grad_norm": 23.61080551147461, + "learning_rate": 5e-06, + "loss": 1.3336, + "num_input_tokens_seen": 13087760, + "step": 209 + }, + { + "epoch": 0.6955074875207987, + "loss": 1.5246670246124268, + "loss_ce": 0.002206052653491497, + "loss_iou": 0.484375, + "loss_num": 0.1103515625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 13087760, + "step": 209 + }, + { + "epoch": 0.6988352745424293, + "grad_norm": 13.288503646850586, + "learning_rate": 5e-06, + "loss": 1.1365, + "num_input_tokens_seen": 13150884, + "step": 210 + }, + { + "epoch": 0.6988352745424293, + "loss": 1.2659443616867065, + "loss_ce": 0.018141645938158035, + "loss_iou": 0.40234375, + "loss_num": 0.0888671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 13150884, + "step": 210 + }, + { + "epoch": 0.7021630615640599, + "grad_norm": 30.597015380859375, + "learning_rate": 5e-06, + "loss": 1.4653, + "num_input_tokens_seen": 13214276, + "step": 211 + }, + { + "epoch": 0.7021630615640599, + "loss": 1.5506691932678223, + "loss_ce": 0.01111849956214428, + "loss_iou": 0.470703125, + "loss_num": 0.1201171875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 13214276, + "step": 211 + }, + { + "epoch": 0.7054908485856906, + "grad_norm": 23.87419319152832, + "learning_rate": 5e-06, + "loss": 1.1347, + "num_input_tokens_seen": 13276424, + "step": 212 + }, + { + "epoch": 0.7054908485856906, + "loss": 1.1488089561462402, + "loss_ce": 0.0018362791743129492, + "loss_iou": 0.36328125, + "loss_num": 0.08447265625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 13276424, + "step": 212 + }, + { + "epoch": 0.7088186356073212, + "grad_norm": 16.2000732421875, + "learning_rate": 5e-06, + "loss": 1.2645, + "num_input_tokens_seen": 13339488, + "step": 213 + }, + { + "epoch": 0.7088186356073212, + "loss": 1.0974924564361572, + "loss_ce": 0.0013010293478146195, + "loss_iou": 0.3359375, + "loss_num": 0.0849609375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 13339488, + "step": 213 + }, + { + "epoch": 0.7121464226289518, + "grad_norm": 34.518123626708984, + "learning_rate": 5e-06, + "loss": 1.0251, + "num_input_tokens_seen": 13402452, + "step": 214 + }, + { + "epoch": 0.7121464226289518, + "loss": 1.091152548789978, + "loss_ce": 0.0013087954139336944, + "loss_iou": 0.3359375, + "loss_num": 0.083984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 13402452, + "step": 214 + }, + { + "epoch": 0.7154742096505824, + "grad_norm": 13.655023574829102, + "learning_rate": 5e-06, + "loss": 1.157, + "num_input_tokens_seen": 13465432, + "step": 215 + }, + { + "epoch": 0.7154742096505824, + "loss": 1.149668574333191, + "loss_ce": 0.00928770937025547, + "loss_iou": 0.33984375, + "loss_num": 0.09228515625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 13465432, + "step": 215 + }, + { + "epoch": 0.718801996672213, + "grad_norm": 25.365924835205078, + "learning_rate": 5e-06, + "loss": 1.125, + "num_input_tokens_seen": 13528332, + "step": 216 + }, + { + "epoch": 0.718801996672213, + "loss": 1.1836438179016113, + "loss_ce": 0.13774539530277252, + "loss_iou": 0.294921875, + "loss_num": 0.0908203125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 13528332, + "step": 216 + }, + { + "epoch": 0.7221297836938436, + "grad_norm": 27.475000381469727, + "learning_rate": 5e-06, + "loss": 1.4337, + "num_input_tokens_seen": 13590456, + "step": 217 + }, + { + "epoch": 0.7221297836938436, + "loss": 1.3032087087631226, + "loss_ce": 0.01927315816283226, + "loss_iou": 0.40625, + "loss_num": 0.09375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 13590456, + "step": 217 + }, + { + "epoch": 0.7254575707154742, + "grad_norm": 26.22903823852539, + "learning_rate": 5e-06, + "loss": 1.1997, + "num_input_tokens_seen": 13653520, + "step": 218 + }, + { + "epoch": 0.7254575707154742, + "loss": 1.0637105703353882, + "loss_ce": 0.0012105784844607115, + "loss_iou": 0.30859375, + "loss_num": 0.0888671875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 13653520, + "step": 218 + }, + { + "epoch": 0.7287853577371048, + "grad_norm": 21.157546997070312, + "learning_rate": 5e-06, + "loss": 1.2113, + "num_input_tokens_seen": 13717416, + "step": 219 + }, + { + "epoch": 0.7287853577371048, + "loss": 1.2450811862945557, + "loss_ce": 0.006799900438636541, + "loss_iou": 0.408203125, + "loss_num": 0.08447265625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 13717416, + "step": 219 + }, + { + "epoch": 0.7321131447587355, + "grad_norm": 16.073261260986328, + "learning_rate": 5e-06, + "loss": 1.3171, + "num_input_tokens_seen": 13780356, + "step": 220 + }, + { + "epoch": 0.7321131447587355, + "loss": 1.1784981489181519, + "loss_ce": 0.005646565929055214, + "loss_iou": 0.400390625, + "loss_num": 0.07470703125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 13780356, + "step": 220 + }, + { + "epoch": 0.7354409317803661, + "grad_norm": 9.423686027526855, + "learning_rate": 5e-06, + "loss": 1.4231, + "num_input_tokens_seen": 13843264, + "step": 221 + }, + { + "epoch": 0.7354409317803661, + "loss": 1.6099144220352173, + "loss_ce": 0.0015159165486693382, + "loss_iou": 0.484375, + "loss_num": 0.1279296875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 13843264, + "step": 221 + }, + { + "epoch": 0.7387687188019967, + "grad_norm": 13.743108749389648, + "learning_rate": 5e-06, + "loss": 1.1927, + "num_input_tokens_seen": 13905668, + "step": 222 + }, + { + "epoch": 0.7387687188019967, + "loss": 1.1869020462036133, + "loss_ce": 0.001599297858774662, + "loss_iou": 0.40234375, + "loss_num": 0.07666015625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 13905668, + "step": 222 + }, + { + "epoch": 0.7420965058236273, + "grad_norm": 31.587129592895508, + "learning_rate": 5e-06, + "loss": 1.3488, + "num_input_tokens_seen": 13968100, + "step": 223 + }, + { + "epoch": 0.7420965058236273, + "loss": 1.445157527923584, + "loss_ce": 0.01302863098680973, + "loss_iou": 0.45703125, + "loss_num": 0.10400390625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 13968100, + "step": 223 + }, + { + "epoch": 0.7454242928452579, + "grad_norm": 11.37446403503418, + "learning_rate": 5e-06, + "loss": 1.1052, + "num_input_tokens_seen": 14029952, + "step": 224 + }, + { + "epoch": 0.7454242928452579, + "loss": 1.2456074953079224, + "loss_ce": 0.0007344639161601663, + "loss_iou": 0.416015625, + "loss_num": 0.08251953125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 14029952, + "step": 224 + }, + { + "epoch": 0.7487520798668885, + "grad_norm": 16.84494972229004, + "learning_rate": 5e-06, + "loss": 1.301, + "num_input_tokens_seen": 14093280, + "step": 225 + }, + { + "epoch": 0.7487520798668885, + "loss": 1.3011748790740967, + "loss_ce": 0.015042103826999664, + "loss_iou": 0.330078125, + "loss_num": 0.12451171875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 14093280, + "step": 225 + }, + { + "epoch": 0.7520798668885191, + "grad_norm": 9.384833335876465, + "learning_rate": 5e-06, + "loss": 1.2195, + "num_input_tokens_seen": 14157284, + "step": 226 + }, + { + "epoch": 0.7520798668885191, + "loss": 1.217372179031372, + "loss_ce": 0.0010636431397870183, + "loss_iou": 0.33203125, + "loss_num": 0.11083984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 14157284, + "step": 226 + }, + { + "epoch": 0.7554076539101497, + "grad_norm": 15.191215515136719, + "learning_rate": 5e-06, + "loss": 1.1685, + "num_input_tokens_seen": 14219260, + "step": 227 + }, + { + "epoch": 0.7554076539101497, + "loss": 1.1222490072250366, + "loss_ce": 0.015803655609488487, + "loss_iou": 0.369140625, + "loss_num": 0.07373046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 14219260, + "step": 227 + }, + { + "epoch": 0.7587354409317804, + "grad_norm": 24.131282806396484, + "learning_rate": 5e-06, + "loss": 1.1008, + "num_input_tokens_seen": 14281308, + "step": 228 + }, + { + "epoch": 0.7587354409317804, + "loss": 1.2230792045593262, + "loss_ce": 0.0006670955335721374, + "loss_iou": 0.36328125, + "loss_num": 0.09912109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 14281308, + "step": 228 + }, + { + "epoch": 0.762063227953411, + "grad_norm": 14.669888496398926, + "learning_rate": 5e-06, + "loss": 1.1266, + "num_input_tokens_seen": 14343964, + "step": 229 + }, + { + "epoch": 0.762063227953411, + "loss": 1.3784270286560059, + "loss_ce": 0.008309826254844666, + "loss_iou": 0.462890625, + "loss_num": 0.08935546875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 14343964, + "step": 229 + }, + { + "epoch": 0.7653910149750416, + "grad_norm": 18.83928871154785, + "learning_rate": 5e-06, + "loss": 1.3428, + "num_input_tokens_seen": 14407128, + "step": 230 + }, + { + "epoch": 0.7653910149750416, + "loss": 1.1617732048034668, + "loss_ce": 0.0025936225429177284, + "loss_iou": 0.361328125, + "loss_num": 0.0869140625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 14407128, + "step": 230 + }, + { + "epoch": 0.7687188019966722, + "grad_norm": 14.189291000366211, + "learning_rate": 5e-06, + "loss": 0.9483, + "num_input_tokens_seen": 14470136, + "step": 231 + }, + { + "epoch": 0.7687188019966722, + "loss": 0.9246655702590942, + "loss_ce": 0.01060311309993267, + "loss_iou": 0.298828125, + "loss_num": 0.0634765625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 14470136, + "step": 231 + }, + { + "epoch": 0.7720465890183028, + "grad_norm": 14.045916557312012, + "learning_rate": 5e-06, + "loss": 0.8905, + "num_input_tokens_seen": 14532512, + "step": 232 + }, + { + "epoch": 0.7720465890183028, + "loss": 0.65977543592453, + "loss_ce": 0.014023483730852604, + "loss_iou": 0.166015625, + "loss_num": 0.06298828125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 14532512, + "step": 232 + }, + { + "epoch": 0.7753743760399334, + "grad_norm": 20.161911010742188, + "learning_rate": 5e-06, + "loss": 1.3524, + "num_input_tokens_seen": 14594204, + "step": 233 + }, + { + "epoch": 0.7753743760399334, + "loss": 1.3588159084320068, + "loss_ce": 0.006765137426555157, + "loss_iou": 0.46484375, + "loss_num": 0.083984375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 14594204, + "step": 233 + }, + { + "epoch": 0.778702163061564, + "grad_norm": 12.568875312805176, + "learning_rate": 5e-06, + "loss": 1.1444, + "num_input_tokens_seen": 14656860, + "step": 234 + }, + { + "epoch": 0.778702163061564, + "loss": 0.9393011331558228, + "loss_ce": 0.014252342283725739, + "loss_iou": 0.296875, + "loss_num": 0.06640625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 14656860, + "step": 234 + }, + { + "epoch": 0.7820299500831946, + "grad_norm": 21.507532119750977, + "learning_rate": 5e-06, + "loss": 1.3511, + "num_input_tokens_seen": 14720468, + "step": 235 + }, + { + "epoch": 0.7820299500831946, + "loss": 1.420245885848999, + "loss_ce": 0.011066140606999397, + "loss_iou": 0.482421875, + "loss_num": 0.0888671875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 14720468, + "step": 235 + }, + { + "epoch": 0.7853577371048253, + "grad_norm": 33.57356643676758, + "learning_rate": 5e-06, + "loss": 1.4398, + "num_input_tokens_seen": 14784184, + "step": 236 + }, + { + "epoch": 0.7853577371048253, + "loss": 1.4989573955535889, + "loss_ce": 0.0023754474241286516, + "loss_iou": 0.458984375, + "loss_num": 0.115234375, + "loss_xval": 1.5, + "num_input_tokens_seen": 14784184, + "step": 236 + }, + { + "epoch": 0.7886855241264559, + "grad_norm": 13.441980361938477, + "learning_rate": 5e-06, + "loss": 0.9306, + "num_input_tokens_seen": 14846184, + "step": 237 + }, + { + "epoch": 0.7886855241264559, + "loss": 0.9496654272079468, + "loss_ce": 0.007770880591124296, + "loss_iou": 0.259765625, + "loss_num": 0.083984375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 14846184, + "step": 237 + }, + { + "epoch": 0.7920133111480865, + "grad_norm": 23.40178871154785, + "learning_rate": 5e-06, + "loss": 1.1604, + "num_input_tokens_seen": 14908792, + "step": 238 + }, + { + "epoch": 0.7920133111480865, + "loss": 1.1228361129760742, + "loss_ce": 0.0007658317917957902, + "loss_iou": 0.31640625, + "loss_num": 0.09814453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 14908792, + "step": 238 + }, + { + "epoch": 0.7953410981697171, + "grad_norm": 29.678503036499023, + "learning_rate": 5e-06, + "loss": 1.4377, + "num_input_tokens_seen": 14971520, + "step": 239 + }, + { + "epoch": 0.7953410981697171, + "loss": 1.4047870635986328, + "loss_ce": 0.006837797816842794, + "loss_iou": 0.50390625, + "loss_num": 0.07763671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 14971520, + "step": 239 + }, + { + "epoch": 0.7986688851913477, + "grad_norm": 20.842504501342773, + "learning_rate": 5e-06, + "loss": 1.1844, + "num_input_tokens_seen": 15033176, + "step": 240 + }, + { + "epoch": 0.7986688851913477, + "loss": 1.3855491876602173, + "loss_ce": 0.02422105148434639, + "loss_iou": 0.416015625, + "loss_num": 0.10498046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 15033176, + "step": 240 + }, + { + "epoch": 0.8019966722129783, + "grad_norm": 9.68075942993164, + "learning_rate": 5e-06, + "loss": 1.3038, + "num_input_tokens_seen": 15094324, + "step": 241 + }, + { + "epoch": 0.8019966722129783, + "loss": 1.2901484966278076, + "loss_ce": 0.0098751625046134, + "loss_iou": 0.359375, + "loss_num": 0.1123046875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 15094324, + "step": 241 + }, + { + "epoch": 0.8053244592346089, + "grad_norm": 14.69822883605957, + "learning_rate": 5e-06, + "loss": 1.2158, + "num_input_tokens_seen": 15156516, + "step": 242 + }, + { + "epoch": 0.8053244592346089, + "loss": 1.2247101068496704, + "loss_ce": 0.02549135871231556, + "loss_iou": 0.33203125, + "loss_num": 0.1064453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 15156516, + "step": 242 + }, + { + "epoch": 0.8086522462562395, + "grad_norm": 16.155757904052734, + "learning_rate": 5e-06, + "loss": 1.2242, + "num_input_tokens_seen": 15218404, + "step": 243 + }, + { + "epoch": 0.8086522462562395, + "loss": 1.1564106941223145, + "loss_ce": 0.015785671770572662, + "loss_iou": 0.375, + "loss_num": 0.07763671875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 15218404, + "step": 243 + }, + { + "epoch": 0.8119800332778702, + "grad_norm": 13.781820297241211, + "learning_rate": 5e-06, + "loss": 1.1725, + "num_input_tokens_seen": 15282228, + "step": 244 + }, + { + "epoch": 0.8119800332778702, + "loss": 1.1227807998657227, + "loss_ce": 0.017311980947852135, + "loss_iou": 0.384765625, + "loss_num": 0.0673828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 15282228, + "step": 244 + }, + { + "epoch": 0.8153078202995009, + "grad_norm": 15.639060020446777, + "learning_rate": 5e-06, + "loss": 1.1997, + "num_input_tokens_seen": 15345232, + "step": 245 + }, + { + "epoch": 0.8153078202995009, + "loss": 1.175334095954895, + "loss_ce": 0.0005293773720040917, + "loss_iou": 0.408203125, + "loss_num": 0.07177734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 15345232, + "step": 245 + }, + { + "epoch": 0.8186356073211315, + "grad_norm": 20.45536994934082, + "learning_rate": 5e-06, + "loss": 1.2539, + "num_input_tokens_seen": 15408764, + "step": 246 + }, + { + "epoch": 0.8186356073211315, + "loss": 1.2810962200164795, + "loss_ce": 0.00033442690619267523, + "loss_iou": 0.44921875, + "loss_num": 0.076171875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 15408764, + "step": 246 + }, + { + "epoch": 0.8219633943427621, + "grad_norm": 12.981587409973145, + "learning_rate": 5e-06, + "loss": 1.0421, + "num_input_tokens_seen": 15471008, + "step": 247 + }, + { + "epoch": 0.8219633943427621, + "loss": 1.138602375984192, + "loss_ce": 0.00383676472119987, + "loss_iou": 0.38671875, + "loss_num": 0.072265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 15471008, + "step": 247 + }, + { + "epoch": 0.8252911813643927, + "grad_norm": 51.6541633605957, + "learning_rate": 5e-06, + "loss": 0.8726, + "num_input_tokens_seen": 15533376, + "step": 248 + }, + { + "epoch": 0.8252911813643927, + "loss": 0.745193362236023, + "loss_ce": 0.01130670215934515, + "loss_iou": 0.1748046875, + "loss_num": 0.07666015625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 15533376, + "step": 248 + }, + { + "epoch": 0.8286189683860233, + "grad_norm": 20.74786949157715, + "learning_rate": 5e-06, + "loss": 1.274, + "num_input_tokens_seen": 15596260, + "step": 249 + }, + { + "epoch": 0.8286189683860233, + "loss": 0.9992030262947083, + "loss_ce": 0.005550671834498644, + "loss_iou": 0.2236328125, + "loss_num": 0.109375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 15596260, + "step": 249 + }, + { + "epoch": 0.831946755407654, + "grad_norm": 60.06270980834961, + "learning_rate": 5e-06, + "loss": 1.4534, + "num_input_tokens_seen": 15658848, + "step": 250 + }, + { + "epoch": 0.831946755407654, + "eval_seeclick_CIoU": 0.12590423226356506, + "eval_seeclick_GIoU": 0.17364877462387085, + "eval_seeclick_IoU": 0.24328559637069702, + "eval_seeclick_MAE_all": 0.18153271079063416, + "eval_seeclick_MAE_h": 0.07490194030106068, + "eval_seeclick_MAE_w": 0.10936548560857773, + "eval_seeclick_MAE_x_boxes": 0.31923606991767883, + "eval_seeclick_MAE_y_boxes": 0.09831257537007332, + "eval_seeclick_NUM_probability": 0.9999359548091888, + "eval_seeclick_inside_bbox": 0.30520834028720856, + "eval_seeclick_loss": 2.6519570350646973, + "eval_seeclick_loss_ce": 0.03889298997819424, + "eval_seeclick_loss_iou": 0.8387451171875, + "eval_seeclick_loss_num": 0.1761322021484375, + "eval_seeclick_loss_xval": 2.558349609375, + "eval_seeclick_runtime": 70.0545, + "eval_seeclick_samples_per_second": 0.671, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 15658848, + "step": 250 + }, + { + "epoch": 0.831946755407654, + "eval_icons_CIoU": 0.07106863334774971, + "eval_icons_GIoU": 0.19157247245311737, + "eval_icons_IoU": 0.21291274577379227, + "eval_icons_MAE_all": 0.1373068317770958, + "eval_icons_MAE_h": 0.07063675299286842, + "eval_icons_MAE_w": 0.13146401941776276, + "eval_icons_MAE_x_boxes": 0.1353582739830017, + "eval_icons_MAE_y_boxes": 0.05130962934345007, + "eval_icons_NUM_probability": 0.9999799430370331, + "eval_icons_inside_bbox": 0.2986111119389534, + "eval_icons_loss": 2.3059701919555664, + "eval_icons_loss_ce": 2.8694971661025193e-05, + "eval_icons_loss_iou": 0.806396484375, + "eval_icons_loss_num": 0.14170265197753906, + "eval_icons_loss_xval": 2.322265625, + "eval_icons_runtime": 66.8297, + "eval_icons_samples_per_second": 0.748, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 15658848, + "step": 250 + }, + { + "epoch": 0.831946755407654, + "eval_screenspot_CIoU": 0.023540629694859188, + "eval_screenspot_GIoU": 0.07850407063961029, + "eval_screenspot_IoU": 0.1775170018275579, + "eval_screenspot_MAE_all": 0.22443277140458426, + "eval_screenspot_MAE_h": 0.09910715123017629, + "eval_screenspot_MAE_w": 0.17734555900096893, + "eval_screenspot_MAE_x_boxes": 0.31622066100438434, + "eval_screenspot_MAE_y_boxes": 0.13959191491206488, + "eval_screenspot_NUM_probability": 0.9998429814974467, + "eval_screenspot_inside_bbox": 0.2912500003973643, + "eval_screenspot_loss": 2.9710144996643066, + "eval_screenspot_loss_ce": 0.0023049935698509216, + "eval_screenspot_loss_iou": 0.92724609375, + "eval_screenspot_loss_num": 0.23094685872395834, + "eval_screenspot_loss_xval": 3.009765625, + "eval_screenspot_runtime": 119.5082, + "eval_screenspot_samples_per_second": 0.745, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 15658848, + "step": 250 + }, + { + "epoch": 0.831946755407654, + "eval_compot_CIoU": -0.05822424963116646, + "eval_compot_GIoU": 0.039091480895876884, + "eval_compot_IoU": 0.10423949733376503, + "eval_compot_MAE_all": 0.23665092885494232, + "eval_compot_MAE_h": 0.08639108017086983, + "eval_compot_MAE_w": 0.25060585141181946, + "eval_compot_MAE_x_boxes": 0.2275974601507187, + "eval_compot_MAE_y_boxes": 0.14669033139944077, + "eval_compot_NUM_probability": 0.9997861981391907, + "eval_compot_inside_bbox": 0.2204861119389534, + "eval_compot_loss": 3.053682804107666, + "eval_compot_loss_ce": 0.0055244737304747105, + "eval_compot_loss_iou": 0.95751953125, + "eval_compot_loss_num": 0.2315673828125, + "eval_compot_loss_xval": 3.07421875, + "eval_compot_runtime": 67.9773, + "eval_compot_samples_per_second": 0.736, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 15658848, + "step": 250 + }, + { + "epoch": 0.831946755407654, + "eval_custom_ui_MAE_all": 0.11324165761470795, + "eval_custom_ui_MAE_x": 0.11285695433616638, + "eval_custom_ui_MAE_y": 0.11362636089324951, + "eval_custom_ui_NUM_probability": 0.9999939203262329, + "eval_custom_ui_loss": 0.5810784697532654, + "eval_custom_ui_loss_ce": 0.01076856441795826, + "eval_custom_ui_loss_num": 0.1113433837890625, + "eval_custom_ui_loss_xval": 0.556640625, + "eval_custom_ui_runtime": 57.5498, + "eval_custom_ui_samples_per_second": 0.869, + "eval_custom_ui_steps_per_second": 0.035, + "num_input_tokens_seen": 15658848, + "step": 250 + }, + { + "epoch": 0.831946755407654, + "loss": 0.5382449626922607, + "loss_ce": 0.010657099075615406, + "loss_iou": 0.0, + "loss_num": 0.10546875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 15658848, + "step": 250 + }, + { + "epoch": 0.8352745424292846, + "grad_norm": 42.612083435058594, + "learning_rate": 5e-06, + "loss": 1.1362, + "num_input_tokens_seen": 15721148, + "step": 251 + }, + { + "epoch": 0.8352745424292846, + "loss": 1.1814727783203125, + "loss_ce": 0.0027619153261184692, + "loss_iou": 0.353515625, + "loss_num": 0.09423828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 15721148, + "step": 251 + }, + { + "epoch": 0.8386023294509152, + "grad_norm": 13.122926712036133, + "learning_rate": 5e-06, + "loss": 1.0092, + "num_input_tokens_seen": 15783360, + "step": 252 + }, + { + "epoch": 0.8386023294509152, + "loss": 1.0700435638427734, + "loss_ce": 0.0011958942050114274, + "loss_iou": 0.33203125, + "loss_num": 0.08056640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 15783360, + "step": 252 + }, + { + "epoch": 0.8419301164725458, + "grad_norm": 21.410629272460938, + "learning_rate": 5e-06, + "loss": 1.1255, + "num_input_tokens_seen": 15845752, + "step": 253 + }, + { + "epoch": 0.8419301164725458, + "loss": 1.3312301635742188, + "loss_ce": 0.004081668332219124, + "loss_iou": 0.416015625, + "loss_num": 0.09814453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 15845752, + "step": 253 + }, + { + "epoch": 0.8452579034941764, + "grad_norm": 15.538365364074707, + "learning_rate": 5e-06, + "loss": 1.2166, + "num_input_tokens_seen": 15908960, + "step": 254 + }, + { + "epoch": 0.8452579034941764, + "loss": 1.152634859085083, + "loss_ce": 0.010545015335083008, + "loss_iou": 0.375, + "loss_num": 0.07861328125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 15908960, + "step": 254 + }, + { + "epoch": 0.848585690515807, + "grad_norm": 40.248104095458984, + "learning_rate": 5e-06, + "loss": 0.985, + "num_input_tokens_seen": 15968648, + "step": 255 + }, + { + "epoch": 0.848585690515807, + "loss": 0.7882208824157715, + "loss_ce": 0.02186344563961029, + "loss_iou": 0.1806640625, + "loss_num": 0.0810546875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 15968648, + "step": 255 + }, + { + "epoch": 0.8519134775374376, + "grad_norm": 28.604537963867188, + "learning_rate": 5e-06, + "loss": 1.1575, + "num_input_tokens_seen": 16030956, + "step": 256 + }, + { + "epoch": 0.8519134775374376, + "loss": 1.093517780303955, + "loss_ce": 0.017590083181858063, + "loss_iou": 0.287109375, + "loss_num": 0.1005859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 16030956, + "step": 256 + }, + { + "epoch": 0.8552412645590682, + "grad_norm": 19.028539657592773, + "learning_rate": 5e-06, + "loss": 1.1595, + "num_input_tokens_seen": 16092152, + "step": 257 + }, + { + "epoch": 0.8552412645590682, + "loss": 1.3261454105377197, + "loss_ce": 0.008274221792817116, + "loss_iou": 0.37890625, + "loss_num": 0.11181640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 16092152, + "step": 257 + }, + { + "epoch": 0.8585690515806988, + "grad_norm": 11.434488296508789, + "learning_rate": 5e-06, + "loss": 1.0794, + "num_input_tokens_seen": 16155380, + "step": 258 + }, + { + "epoch": 0.8585690515806988, + "loss": 1.0630097389221191, + "loss_ce": 0.01808791421353817, + "loss_iou": 0.34765625, + "loss_num": 0.06982421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 16155380, + "step": 258 + }, + { + "epoch": 0.8618968386023295, + "grad_norm": 8.151037216186523, + "learning_rate": 5e-06, + "loss": 0.9681, + "num_input_tokens_seen": 16216940, + "step": 259 + }, + { + "epoch": 0.8618968386023295, + "loss": 0.9589591026306152, + "loss_ce": 0.0010733783710747957, + "loss_iou": 0.2578125, + "loss_num": 0.08837890625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 16216940, + "step": 259 + }, + { + "epoch": 0.8652246256239601, + "grad_norm": 19.071046829223633, + "learning_rate": 5e-06, + "loss": 1.4472, + "num_input_tokens_seen": 16280368, + "step": 260 + }, + { + "epoch": 0.8652246256239601, + "loss": 1.5674210786819458, + "loss_ce": 0.09525313228368759, + "loss_iou": 0.466796875, + "loss_num": 0.107421875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 16280368, + "step": 260 + }, + { + "epoch": 0.8685524126455907, + "grad_norm": 19.866905212402344, + "learning_rate": 5e-06, + "loss": 0.9902, + "num_input_tokens_seen": 16343184, + "step": 261 + }, + { + "epoch": 0.8685524126455907, + "loss": 0.8425556421279907, + "loss_ce": 0.005885718856006861, + "loss_iou": 0.2353515625, + "loss_num": 0.0732421875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 16343184, + "step": 261 + }, + { + "epoch": 0.8718801996672213, + "grad_norm": 26.4387264251709, + "learning_rate": 5e-06, + "loss": 1.1488, + "num_input_tokens_seen": 16405096, + "step": 262 + }, + { + "epoch": 0.8718801996672213, + "loss": 1.1173707246780396, + "loss_ce": 0.0014038896188139915, + "loss_iou": 0.33203125, + "loss_num": 0.09033203125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 16405096, + "step": 262 + }, + { + "epoch": 0.8752079866888519, + "grad_norm": 44.49690246582031, + "learning_rate": 5e-06, + "loss": 1.1406, + "num_input_tokens_seen": 16468060, + "step": 263 + }, + { + "epoch": 0.8752079866888519, + "loss": 1.2327797412872314, + "loss_ce": 0.004508184269070625, + "loss_iou": 0.388671875, + "loss_num": 0.0908203125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 16468060, + "step": 263 + }, + { + "epoch": 0.8785357737104825, + "grad_norm": 26.186443328857422, + "learning_rate": 5e-06, + "loss": 1.6099, + "num_input_tokens_seen": 16532308, + "step": 264 + }, + { + "epoch": 0.8785357737104825, + "loss": 1.6475975513458252, + "loss_ce": 0.0040428778156638145, + "loss_iou": 0.55078125, + "loss_num": 0.1083984375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 16532308, + "step": 264 + }, + { + "epoch": 0.8818635607321131, + "grad_norm": 16.094806671142578, + "learning_rate": 5e-06, + "loss": 1.2654, + "num_input_tokens_seen": 16595464, + "step": 265 + }, + { + "epoch": 0.8818635607321131, + "loss": 1.439080834388733, + "loss_ce": 0.015496812760829926, + "loss_iou": 0.431640625, + "loss_num": 0.1123046875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 16595464, + "step": 265 + }, + { + "epoch": 0.8851913477537438, + "grad_norm": 13.568002700805664, + "learning_rate": 5e-06, + "loss": 1.3184, + "num_input_tokens_seen": 16659512, + "step": 266 + }, + { + "epoch": 0.8851913477537438, + "loss": 1.2902538776397705, + "loss_ce": 0.0011914295610040426, + "loss_iou": 0.392578125, + "loss_num": 0.10107421875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 16659512, + "step": 266 + }, + { + "epoch": 0.8885191347753744, + "grad_norm": 17.312501907348633, + "learning_rate": 5e-06, + "loss": 1.2592, + "num_input_tokens_seen": 16723264, + "step": 267 + }, + { + "epoch": 0.8885191347753744, + "loss": 1.0298858880996704, + "loss_ce": 0.006448333151638508, + "loss_iou": 0.30859375, + "loss_num": 0.0810546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 16723264, + "step": 267 + }, + { + "epoch": 0.891846921797005, + "grad_norm": 11.952759742736816, + "learning_rate": 5e-06, + "loss": 1.1869, + "num_input_tokens_seen": 16786628, + "step": 268 + }, + { + "epoch": 0.891846921797005, + "loss": 1.2707010507583618, + "loss_ce": 0.002634685719385743, + "loss_iou": 0.41015625, + "loss_num": 0.08984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 16786628, + "step": 268 + }, + { + "epoch": 0.8951747088186356, + "grad_norm": 81.69204711914062, + "learning_rate": 5e-06, + "loss": 1.0619, + "num_input_tokens_seen": 16848308, + "step": 269 + }, + { + "epoch": 0.8951747088186356, + "loss": 1.1395946741104126, + "loss_ce": 0.0033642093185335398, + "loss_iou": 0.287109375, + "loss_num": 0.1123046875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 16848308, + "step": 269 + }, + { + "epoch": 0.8985024958402662, + "grad_norm": 17.892982482910156, + "learning_rate": 5e-06, + "loss": 1.0715, + "num_input_tokens_seen": 16910492, + "step": 270 + }, + { + "epoch": 0.8985024958402662, + "loss": 0.9769110679626465, + "loss_ce": 0.0023017562925815582, + "loss_iou": 0.265625, + "loss_num": 0.0888671875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 16910492, + "step": 270 + }, + { + "epoch": 0.9018302828618968, + "grad_norm": 14.101139068603516, + "learning_rate": 5e-06, + "loss": 1.093, + "num_input_tokens_seen": 16973704, + "step": 271 + }, + { + "epoch": 0.9018302828618968, + "loss": 0.8828328847885132, + "loss_ce": 0.00368250603787601, + "loss_iou": 0.2421875, + "loss_num": 0.0791015625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 16973704, + "step": 271 + }, + { + "epoch": 0.9051580698835274, + "grad_norm": 17.054306030273438, + "learning_rate": 5e-06, + "loss": 1.1851, + "num_input_tokens_seen": 17037116, + "step": 272 + }, + { + "epoch": 0.9051580698835274, + "loss": 1.2159790992736816, + "loss_ce": 0.0040650563314557076, + "loss_iou": 0.369140625, + "loss_num": 0.0947265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 17037116, + "step": 272 + }, + { + "epoch": 0.908485856905158, + "grad_norm": 19.807126998901367, + "learning_rate": 5e-06, + "loss": 1.0618, + "num_input_tokens_seen": 17098136, + "step": 273 + }, + { + "epoch": 0.908485856905158, + "loss": 1.046678066253662, + "loss_ce": 0.020799197256565094, + "loss_iou": 0.2080078125, + "loss_num": 0.1220703125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 17098136, + "step": 273 + }, + { + "epoch": 0.9118136439267887, + "grad_norm": 19.801219940185547, + "learning_rate": 5e-06, + "loss": 1.4729, + "num_input_tokens_seen": 17160376, + "step": 274 + }, + { + "epoch": 0.9118136439267887, + "loss": 1.3921507596969604, + "loss_ce": 0.038146864622831345, + "loss_iou": 0.4375, + "loss_num": 0.095703125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 17160376, + "step": 274 + }, + { + "epoch": 0.9151414309484193, + "grad_norm": 28.975116729736328, + "learning_rate": 5e-06, + "loss": 1.058, + "num_input_tokens_seen": 17223484, + "step": 275 + }, + { + "epoch": 0.9151414309484193, + "loss": 1.0364493131637573, + "loss_ce": 0.0027578980661928654, + "loss_iou": 0.33984375, + "loss_num": 0.0703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 17223484, + "step": 275 + }, + { + "epoch": 0.9184692179700499, + "grad_norm": 16.811128616333008, + "learning_rate": 5e-06, + "loss": 0.8941, + "num_input_tokens_seen": 17285864, + "step": 276 + }, + { + "epoch": 0.9184692179700499, + "loss": 0.9024735689163208, + "loss_ce": 0.01306929811835289, + "loss_iou": 0.236328125, + "loss_num": 0.08349609375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 17285864, + "step": 276 + }, + { + "epoch": 0.9217970049916805, + "grad_norm": 16.22093391418457, + "learning_rate": 5e-06, + "loss": 1.0589, + "num_input_tokens_seen": 17349464, + "step": 277 + }, + { + "epoch": 0.9217970049916805, + "loss": 0.9893227219581604, + "loss_ce": 0.0005531868082471192, + "loss_iou": 0.2451171875, + "loss_num": 0.099609375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 17349464, + "step": 277 + }, + { + "epoch": 0.9251247920133111, + "grad_norm": 12.684296607971191, + "learning_rate": 5e-06, + "loss": 1.1196, + "num_input_tokens_seen": 17412492, + "step": 278 + }, + { + "epoch": 0.9251247920133111, + "loss": 0.9944457411766052, + "loss_ce": 0.0027464870363473892, + "loss_iou": 0.26953125, + "loss_num": 0.09033203125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 17412492, + "step": 278 + }, + { + "epoch": 0.9284525790349417, + "grad_norm": 54.076019287109375, + "learning_rate": 5e-06, + "loss": 1.2471, + "num_input_tokens_seen": 17475792, + "step": 279 + }, + { + "epoch": 0.9284525790349417, + "loss": 1.5113483667373657, + "loss_ce": 0.0035358648747205734, + "loss_iou": 0.453125, + "loss_num": 0.12060546875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 17475792, + "step": 279 + }, + { + "epoch": 0.9317803660565723, + "grad_norm": 46.81892013549805, + "learning_rate": 5e-06, + "loss": 1.3139, + "num_input_tokens_seen": 17539360, + "step": 280 + }, + { + "epoch": 0.9317803660565723, + "loss": 0.9593971967697144, + "loss_ce": 0.013108117505908012, + "loss_iou": 0.3125, + "loss_num": 0.06396484375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 17539360, + "step": 280 + }, + { + "epoch": 0.9351081530782029, + "grad_norm": 26.500545501708984, + "learning_rate": 5e-06, + "loss": 1.0319, + "num_input_tokens_seen": 17602120, + "step": 281 + }, + { + "epoch": 0.9351081530782029, + "loss": 0.7355974316596985, + "loss_ce": 0.0031755524687469006, + "loss_iou": 0.2431640625, + "loss_num": 0.049072265625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 17602120, + "step": 281 + }, + { + "epoch": 0.9384359400998337, + "grad_norm": 26.911073684692383, + "learning_rate": 5e-06, + "loss": 1.0786, + "num_input_tokens_seen": 17665200, + "step": 282 + }, + { + "epoch": 0.9384359400998337, + "loss": 1.0884435176849365, + "loss_ce": 0.0012852513464167714, + "loss_iou": 0.275390625, + "loss_num": 0.107421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 17665200, + "step": 282 + }, + { + "epoch": 0.9417637271214643, + "grad_norm": 10.468093872070312, + "learning_rate": 5e-06, + "loss": 0.7664, + "num_input_tokens_seen": 17727580, + "step": 283 + }, + { + "epoch": 0.9417637271214643, + "loss": 0.8793269991874695, + "loss_ce": 0.0031062541529536247, + "loss_iou": 0.298828125, + "loss_num": 0.055908203125, + "loss_xval": 0.875, + "num_input_tokens_seen": 17727580, + "step": 283 + }, + { + "epoch": 0.9450915141430949, + "grad_norm": 9.961268424987793, + "learning_rate": 5e-06, + "loss": 0.9138, + "num_input_tokens_seen": 17789316, + "step": 284 + }, + { + "epoch": 0.9450915141430949, + "loss": 1.0281856060028076, + "loss_ce": 0.002794938860461116, + "loss_iou": 0.2177734375, + "loss_num": 0.1181640625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 17789316, + "step": 284 + }, + { + "epoch": 0.9484193011647255, + "grad_norm": 12.167577743530273, + "learning_rate": 5e-06, + "loss": 1.1296, + "num_input_tokens_seen": 17852072, + "step": 285 + }, + { + "epoch": 0.9484193011647255, + "loss": 1.0344905853271484, + "loss_ce": 0.005193705670535564, + "loss_iou": 0.322265625, + "loss_num": 0.0771484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 17852072, + "step": 285 + }, + { + "epoch": 0.9517470881863561, + "grad_norm": 12.375018119812012, + "learning_rate": 5e-06, + "loss": 1.0284, + "num_input_tokens_seen": 17914960, + "step": 286 + }, + { + "epoch": 0.9517470881863561, + "loss": 0.9589194059371948, + "loss_ce": 0.009944751858711243, + "loss_iou": 0.3125, + "loss_num": 0.06494140625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 17914960, + "step": 286 + }, + { + "epoch": 0.9550748752079867, + "grad_norm": 27.08910369873047, + "learning_rate": 5e-06, + "loss": 1.1156, + "num_input_tokens_seen": 17976848, + "step": 287 + }, + { + "epoch": 0.9550748752079867, + "loss": 0.7211456298828125, + "loss_ce": 0.0006866899784654379, + "loss_iou": 0.1591796875, + "loss_num": 0.08056640625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 17976848, + "step": 287 + }, + { + "epoch": 0.9584026622296173, + "grad_norm": 20.21120262145996, + "learning_rate": 5e-06, + "loss": 0.9142, + "num_input_tokens_seen": 18038416, + "step": 288 + }, + { + "epoch": 0.9584026622296173, + "loss": 1.0173907279968262, + "loss_ce": 0.010310713201761246, + "loss_iou": 0.310546875, + "loss_num": 0.0771484375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 18038416, + "step": 288 + }, + { + "epoch": 0.961730449251248, + "grad_norm": 25.247774124145508, + "learning_rate": 5e-06, + "loss": 1.3575, + "num_input_tokens_seen": 18101284, + "step": 289 + }, + { + "epoch": 0.961730449251248, + "loss": 1.379800796508789, + "loss_ce": 0.005777302198112011, + "loss_iou": 0.408203125, + "loss_num": 0.111328125, + "loss_xval": 1.375, + "num_input_tokens_seen": 18101284, + "step": 289 + }, + { + "epoch": 0.9650582362728786, + "grad_norm": 18.5815486907959, + "learning_rate": 5e-06, + "loss": 1.1872, + "num_input_tokens_seen": 18163408, + "step": 290 + }, + { + "epoch": 0.9650582362728786, + "loss": 1.0184900760650635, + "loss_ce": 0.000667768414132297, + "loss_iou": 0.26953125, + "loss_num": 0.09619140625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 18163408, + "step": 290 + }, + { + "epoch": 0.9683860232945092, + "grad_norm": 15.90703010559082, + "learning_rate": 5e-06, + "loss": 1.0381, + "num_input_tokens_seen": 18225688, + "step": 291 + }, + { + "epoch": 0.9683860232945092, + "loss": 0.9319148659706116, + "loss_ce": 0.00027425718144513667, + "loss_iou": 0.296875, + "loss_num": 0.0673828125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 18225688, + "step": 291 + }, + { + "epoch": 0.9717138103161398, + "grad_norm": 16.51078987121582, + "learning_rate": 5e-06, + "loss": 1.0699, + "num_input_tokens_seen": 18288152, + "step": 292 + }, + { + "epoch": 0.9717138103161398, + "loss": 1.2005267143249512, + "loss_ce": 0.0009417659603059292, + "loss_iou": 0.41015625, + "loss_num": 0.07568359375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 18288152, + "step": 292 + }, + { + "epoch": 0.9750415973377704, + "grad_norm": 21.68274688720703, + "learning_rate": 5e-06, + "loss": 1.1143, + "num_input_tokens_seen": 18351572, + "step": 293 + }, + { + "epoch": 0.9750415973377704, + "loss": 1.3064453601837158, + "loss_ce": 0.0044434284791350365, + "loss_iou": 0.41796875, + "loss_num": 0.0927734375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 18351572, + "step": 293 + }, + { + "epoch": 0.978369384359401, + "grad_norm": 17.310152053833008, + "learning_rate": 5e-06, + "loss": 1.0826, + "num_input_tokens_seen": 18415056, + "step": 294 + }, + { + "epoch": 0.978369384359401, + "loss": 0.8527212142944336, + "loss_ce": 0.004210504237562418, + "loss_iou": 0.265625, + "loss_num": 0.06396484375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 18415056, + "step": 294 + }, + { + "epoch": 0.9816971713810316, + "grad_norm": 48.573822021484375, + "learning_rate": 5e-06, + "loss": 1.1994, + "num_input_tokens_seen": 18477488, + "step": 295 + }, + { + "epoch": 0.9816971713810316, + "loss": 1.2597250938415527, + "loss_ce": 0.020955566316843033, + "loss_iou": 0.400390625, + "loss_num": 0.08740234375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 18477488, + "step": 295 + }, + { + "epoch": 0.9850249584026622, + "grad_norm": 25.190378189086914, + "learning_rate": 5e-06, + "loss": 1.2747, + "num_input_tokens_seen": 18541468, + "step": 296 + }, + { + "epoch": 0.9850249584026622, + "loss": 1.3697137832641602, + "loss_ce": 0.002770456252619624, + "loss_iou": 0.4140625, + "loss_num": 0.107421875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 18541468, + "step": 296 + }, + { + "epoch": 0.9883527454242929, + "grad_norm": 13.586545944213867, + "learning_rate": 5e-06, + "loss": 1.2876, + "num_input_tokens_seen": 18604076, + "step": 297 + }, + { + "epoch": 0.9883527454242929, + "loss": 1.3679777383804321, + "loss_ce": 0.004940649960190058, + "loss_iou": 0.435546875, + "loss_num": 0.0986328125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 18604076, + "step": 297 + }, + { + "epoch": 0.9916805324459235, + "grad_norm": 20.70176887512207, + "learning_rate": 5e-06, + "loss": 0.8859, + "num_input_tokens_seen": 18667228, + "step": 298 + }, + { + "epoch": 0.9916805324459235, + "loss": 0.9582436084747314, + "loss_ce": 0.0007241345010697842, + "loss_iou": 0.279296875, + "loss_num": 0.07958984375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 18667228, + "step": 298 + }, + { + "epoch": 0.9950083194675541, + "grad_norm": 11.641081809997559, + "learning_rate": 5e-06, + "loss": 1.1695, + "num_input_tokens_seen": 18730164, + "step": 299 + }, + { + "epoch": 0.9950083194675541, + "loss": 1.2555705308914185, + "loss_ce": 0.029008055105805397, + "loss_iou": 0.3671875, + "loss_num": 0.09814453125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 18730164, + "step": 299 + }, + { + "epoch": 0.9983361064891847, + "grad_norm": 8.146730422973633, + "learning_rate": 5e-06, + "loss": 1.0957, + "num_input_tokens_seen": 18795388, + "step": 300 + }, + { + "epoch": 0.9983361064891847, + "loss": 1.0049164295196533, + "loss_ce": 0.002475087298080325, + "loss_iou": 0.337890625, + "loss_num": 0.06494140625, + "loss_xval": 1.0, + "num_input_tokens_seen": 18795388, + "step": 300 + }, + { + "epoch": 0.9983361064891847, + "loss": 1.238393783569336, + "loss_ce": 0.0025539840571582317, + "loss_iou": 0.384765625, + "loss_num": 0.09375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 18827556, + "step": 300 + }, + { + "epoch": 1.0016638935108153, + "grad_norm": 13.772902488708496, + "learning_rate": 5e-06, + "loss": 1.0861, + "num_input_tokens_seen": 18858660, + "step": 301 + }, + { + "epoch": 1.0016638935108153, + "loss": 0.933795690536499, + "loss_ce": 0.000690196524374187, + "loss_iou": 0.26171875, + "loss_num": 0.08154296875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 18858660, + "step": 301 + }, + { + "epoch": 1.004991680532446, + "grad_norm": 92.42483520507812, + "learning_rate": 5e-06, + "loss": 1.1483, + "num_input_tokens_seen": 18921372, + "step": 302 + }, + { + "epoch": 1.004991680532446, + "loss": 1.1891447305679321, + "loss_ce": 0.002133029280230403, + "loss_iou": 0.3671875, + "loss_num": 0.0908203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 18921372, + "step": 302 + }, + { + "epoch": 1.0083194675540765, + "grad_norm": 13.548688888549805, + "learning_rate": 5e-06, + "loss": 0.8687, + "num_input_tokens_seen": 18982324, + "step": 303 + }, + { + "epoch": 1.0083194675540765, + "loss": 0.7147096395492554, + "loss_ce": 0.003283819416537881, + "loss_iou": 0.1640625, + "loss_num": 0.07666015625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 18982324, + "step": 303 + }, + { + "epoch": 1.0116472545757071, + "grad_norm": 16.49581527709961, + "learning_rate": 5e-06, + "loss": 1.1634, + "num_input_tokens_seen": 19044756, + "step": 304 + }, + { + "epoch": 1.0116472545757071, + "loss": 1.4042516946792603, + "loss_ce": 0.0009313594782724977, + "loss_iou": 0.43359375, + "loss_num": 0.107421875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 19044756, + "step": 304 + }, + { + "epoch": 1.0149750415973378, + "grad_norm": 26.810819625854492, + "learning_rate": 5e-06, + "loss": 0.9443, + "num_input_tokens_seen": 19106748, + "step": 305 + }, + { + "epoch": 1.0149750415973378, + "loss": 0.8466845750808716, + "loss_ce": 0.002446266822516918, + "loss_iou": 0.2314453125, + "loss_num": 0.076171875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 19106748, + "step": 305 + }, + { + "epoch": 1.0183028286189684, + "grad_norm": 11.85273551940918, + "learning_rate": 5e-06, + "loss": 1.1287, + "num_input_tokens_seen": 19169128, + "step": 306 + }, + { + "epoch": 1.0183028286189684, + "loss": 1.0434765815734863, + "loss_ce": 0.00026368399267084897, + "loss_iou": 0.388671875, + "loss_num": 0.053466796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 19169128, + "step": 306 + }, + { + "epoch": 1.021630615640599, + "grad_norm": 14.573290824890137, + "learning_rate": 5e-06, + "loss": 1.0402, + "num_input_tokens_seen": 19232736, + "step": 307 + }, + { + "epoch": 1.021630615640599, + "loss": 1.0792515277862549, + "loss_ce": 0.003812062554061413, + "loss_iou": 0.36328125, + "loss_num": 0.0703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 19232736, + "step": 307 + }, + { + "epoch": 1.0249584026622296, + "grad_norm": 16.600948333740234, + "learning_rate": 5e-06, + "loss": 1.3143, + "num_input_tokens_seen": 19294736, + "step": 308 + }, + { + "epoch": 1.0249584026622296, + "loss": 1.2582886219024658, + "loss_ce": 0.008776895701885223, + "loss_iou": 0.39453125, + "loss_num": 0.091796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 19294736, + "step": 308 + }, + { + "epoch": 1.0282861896838602, + "grad_norm": 11.475916862487793, + "learning_rate": 5e-06, + "loss": 0.8611, + "num_input_tokens_seen": 19354768, + "step": 309 + }, + { + "epoch": 1.0282861896838602, + "loss": 0.5999712944030762, + "loss_ce": 0.002803354524075985, + "loss_iou": 0.01251220703125, + "loss_num": 0.11474609375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 19354768, + "step": 309 + }, + { + "epoch": 1.0316139767054908, + "grad_norm": 16.11930274963379, + "learning_rate": 5e-06, + "loss": 1.2539, + "num_input_tokens_seen": 19418164, + "step": 310 + }, + { + "epoch": 1.0316139767054908, + "loss": 1.3940266370773315, + "loss_ce": 0.0014484911225736141, + "loss_iou": 0.416015625, + "loss_num": 0.1123046875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 19418164, + "step": 310 + }, + { + "epoch": 1.0349417637271214, + "grad_norm": 25.261287689208984, + "learning_rate": 5e-06, + "loss": 1.2662, + "num_input_tokens_seen": 19481684, + "step": 311 + }, + { + "epoch": 1.0349417637271214, + "loss": 1.263882040977478, + "loss_ce": 0.009487524628639221, + "loss_iou": 0.4296875, + "loss_num": 0.07958984375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 19481684, + "step": 311 + }, + { + "epoch": 1.038269550748752, + "grad_norm": 11.63999080657959, + "learning_rate": 5e-06, + "loss": 1.2871, + "num_input_tokens_seen": 19544500, + "step": 312 + }, + { + "epoch": 1.038269550748752, + "loss": 1.305168628692627, + "loss_ce": 0.0012135988799855113, + "loss_iou": 0.4296875, + "loss_num": 0.0888671875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 19544500, + "step": 312 + }, + { + "epoch": 1.0415973377703827, + "grad_norm": 12.800460815429688, + "learning_rate": 5e-06, + "loss": 1.0853, + "num_input_tokens_seen": 19609040, + "step": 313 + }, + { + "epoch": 1.0415973377703827, + "loss": 1.041963815689087, + "loss_ce": 0.0007040311465971172, + "loss_iou": 0.34765625, + "loss_num": 0.0693359375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 19609040, + "step": 313 + }, + { + "epoch": 1.0449251247920133, + "grad_norm": 54.40293502807617, + "learning_rate": 5e-06, + "loss": 0.9638, + "num_input_tokens_seen": 19671612, + "step": 314 + }, + { + "epoch": 1.0449251247920133, + "loss": 0.9310311079025269, + "loss_ce": 0.004761609248816967, + "loss_iou": 0.3125, + "loss_num": 0.060546875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 19671612, + "step": 314 + }, + { + "epoch": 1.0482529118136439, + "grad_norm": 16.393136978149414, + "learning_rate": 5e-06, + "loss": 1.0914, + "num_input_tokens_seen": 19733824, + "step": 315 + }, + { + "epoch": 1.0482529118136439, + "loss": 1.1248711347579956, + "loss_ce": 0.002312490250915289, + "loss_iou": 0.3984375, + "loss_num": 0.06494140625, + "loss_xval": 1.125, + "num_input_tokens_seen": 19733824, + "step": 315 + }, + { + "epoch": 1.0515806988352745, + "grad_norm": 19.917030334472656, + "learning_rate": 5e-06, + "loss": 0.8806, + "num_input_tokens_seen": 19794408, + "step": 316 + }, + { + "epoch": 1.0515806988352745, + "loss": 0.5999534130096436, + "loss_ce": 0.0005881565157324076, + "loss_iou": 0.0, + "loss_num": 0.1201171875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 19794408, + "step": 316 + }, + { + "epoch": 1.054908485856905, + "grad_norm": 23.361522674560547, + "learning_rate": 5e-06, + "loss": 1.2846, + "num_input_tokens_seen": 19857392, + "step": 317 + }, + { + "epoch": 1.054908485856905, + "loss": 1.1950931549072266, + "loss_ce": 0.0005131502402946353, + "loss_iou": 0.357421875, + "loss_num": 0.09619140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 19857392, + "step": 317 + }, + { + "epoch": 1.0582362728785357, + "grad_norm": 15.505134582519531, + "learning_rate": 5e-06, + "loss": 0.9283, + "num_input_tokens_seen": 19921056, + "step": 318 + }, + { + "epoch": 1.0582362728785357, + "loss": 1.0836975574493408, + "loss_ce": 0.0006896366830915213, + "loss_iou": 0.349609375, + "loss_num": 0.07666015625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 19921056, + "step": 318 + }, + { + "epoch": 1.0615640599001663, + "grad_norm": 15.914534568786621, + "learning_rate": 5e-06, + "loss": 0.8054, + "num_input_tokens_seen": 19981520, + "step": 319 + }, + { + "epoch": 1.0615640599001663, + "loss": 0.7295636534690857, + "loss_ce": 0.004862701054662466, + "loss_iou": 0.134765625, + "loss_num": 0.09130859375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 19981520, + "step": 319 + }, + { + "epoch": 1.064891846921797, + "grad_norm": 13.378053665161133, + "learning_rate": 5e-06, + "loss": 1.1706, + "num_input_tokens_seen": 20043756, + "step": 320 + }, + { + "epoch": 1.064891846921797, + "loss": 1.3541194200515747, + "loss_ce": 0.0010921121574938297, + "loss_iou": 0.48828125, + "loss_num": 0.0751953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 20043756, + "step": 320 + }, + { + "epoch": 1.0682196339434276, + "grad_norm": 27.88616943359375, + "learning_rate": 5e-06, + "loss": 1.2254, + "num_input_tokens_seen": 20107020, + "step": 321 + }, + { + "epoch": 1.0682196339434276, + "loss": 1.242746114730835, + "loss_ce": 0.0015352306654676795, + "loss_iou": 0.40234375, + "loss_num": 0.08740234375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 20107020, + "step": 321 + }, + { + "epoch": 1.0715474209650582, + "grad_norm": 16.343183517456055, + "learning_rate": 5e-06, + "loss": 1.1786, + "num_input_tokens_seen": 20169684, + "step": 322 + }, + { + "epoch": 1.0715474209650582, + "loss": 1.4232938289642334, + "loss_ce": 0.00923123024404049, + "loss_iou": 0.404296875, + "loss_num": 0.12109375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 20169684, + "step": 322 + }, + { + "epoch": 1.0748752079866888, + "grad_norm": 19.044235229492188, + "learning_rate": 5e-06, + "loss": 1.1389, + "num_input_tokens_seen": 20231040, + "step": 323 + }, + { + "epoch": 1.0748752079866888, + "loss": 1.2635955810546875, + "loss_ce": 0.0021209523547440767, + "loss_iou": 0.419921875, + "loss_num": 0.08447265625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 20231040, + "step": 323 + }, + { + "epoch": 1.0782029950083194, + "grad_norm": 22.810884475708008, + "learning_rate": 5e-06, + "loss": 1.1854, + "num_input_tokens_seen": 20294960, + "step": 324 + }, + { + "epoch": 1.0782029950083194, + "loss": 1.3016592264175415, + "loss_ce": 0.005028393119573593, + "loss_iou": 0.427734375, + "loss_num": 0.087890625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 20294960, + "step": 324 + }, + { + "epoch": 1.08153078202995, + "grad_norm": 26.073562622070312, + "learning_rate": 5e-06, + "loss": 1.0587, + "num_input_tokens_seen": 20358040, + "step": 325 + }, + { + "epoch": 1.08153078202995, + "loss": 1.235098123550415, + "loss_ce": 0.001699719694443047, + "loss_iou": 0.4140625, + "loss_num": 0.08154296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 20358040, + "step": 325 + }, + { + "epoch": 1.0848585690515806, + "grad_norm": 13.904311180114746, + "learning_rate": 5e-06, + "loss": 1.253, + "num_input_tokens_seen": 20421232, + "step": 326 + }, + { + "epoch": 1.0848585690515806, + "loss": 1.2734218835830688, + "loss_ce": 0.001449216390028596, + "loss_iou": 0.408203125, + "loss_num": 0.09130859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 20421232, + "step": 326 + }, + { + "epoch": 1.0881863560732112, + "grad_norm": 15.164936065673828, + "learning_rate": 5e-06, + "loss": 1.1673, + "num_input_tokens_seen": 20483820, + "step": 327 + }, + { + "epoch": 1.0881863560732112, + "loss": 1.1625075340270996, + "loss_ce": 0.0007643033168278635, + "loss_iou": 0.390625, + "loss_num": 0.076171875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 20483820, + "step": 327 + }, + { + "epoch": 1.0915141430948418, + "grad_norm": 24.25714874267578, + "learning_rate": 5e-06, + "loss": 1.2235, + "num_input_tokens_seen": 20547208, + "step": 328 + }, + { + "epoch": 1.0915141430948418, + "loss": 1.3560025691986084, + "loss_ce": 0.0029753113631159067, + "loss_iou": 0.447265625, + "loss_num": 0.09130859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 20547208, + "step": 328 + }, + { + "epoch": 1.0948419301164725, + "grad_norm": 27.510494232177734, + "learning_rate": 5e-06, + "loss": 0.8721, + "num_input_tokens_seen": 20610188, + "step": 329 + }, + { + "epoch": 1.0948419301164725, + "loss": 0.7758044600486755, + "loss_ce": 0.00029176787938922644, + "loss_iou": 0.27734375, + "loss_num": 0.043701171875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 20610188, + "step": 329 + }, + { + "epoch": 1.098169717138103, + "grad_norm": 162.73048400878906, + "learning_rate": 5e-06, + "loss": 1.1872, + "num_input_tokens_seen": 20674032, + "step": 330 + }, + { + "epoch": 1.098169717138103, + "loss": 0.9909277558326721, + "loss_ce": 0.0016699021216481924, + "loss_iou": 0.357421875, + "loss_num": 0.0546875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 20674032, + "step": 330 + }, + { + "epoch": 1.1014975041597337, + "grad_norm": 10.696892738342285, + "learning_rate": 5e-06, + "loss": 1.1888, + "num_input_tokens_seen": 20735628, + "step": 331 + }, + { + "epoch": 1.1014975041597337, + "loss": 1.2723336219787598, + "loss_ce": 0.0003609945997595787, + "loss_iou": 0.396484375, + "loss_num": 0.095703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 20735628, + "step": 331 + }, + { + "epoch": 1.1048252911813643, + "grad_norm": 51.85942459106445, + "learning_rate": 5e-06, + "loss": 1.2714, + "num_input_tokens_seen": 20799440, + "step": 332 + }, + { + "epoch": 1.1048252911813643, + "loss": 1.278044581413269, + "loss_ce": 0.001189130125567317, + "loss_iou": 0.388671875, + "loss_num": 0.10009765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 20799440, + "step": 332 + }, + { + "epoch": 1.108153078202995, + "grad_norm": 10.22545051574707, + "learning_rate": 5e-06, + "loss": 0.7951, + "num_input_tokens_seen": 20861468, + "step": 333 + }, + { + "epoch": 1.108153078202995, + "loss": 1.0232528448104858, + "loss_ce": 0.000547763193026185, + "loss_iou": 0.310546875, + "loss_num": 0.08056640625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 20861468, + "step": 333 + }, + { + "epoch": 1.1114808652246255, + "grad_norm": 8.053681373596191, + "learning_rate": 5e-06, + "loss": 1.1034, + "num_input_tokens_seen": 20923164, + "step": 334 + }, + { + "epoch": 1.1114808652246255, + "loss": 1.012281060218811, + "loss_ce": 0.0009285883279517293, + "loss_iou": 0.302734375, + "loss_num": 0.08154296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 20923164, + "step": 334 + }, + { + "epoch": 1.1148086522462561, + "grad_norm": 15.463237762451172, + "learning_rate": 5e-06, + "loss": 1.1601, + "num_input_tokens_seen": 20987752, + "step": 335 + }, + { + "epoch": 1.1148086522462561, + "loss": 0.9524677395820618, + "loss_ce": 0.000807545380666852, + "loss_iou": 0.3203125, + "loss_num": 0.062255859375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 20987752, + "step": 335 + }, + { + "epoch": 1.1181364392678868, + "grad_norm": 10.856858253479004, + "learning_rate": 5e-06, + "loss": 1.1705, + "num_input_tokens_seen": 21050632, + "step": 336 + }, + { + "epoch": 1.1181364392678868, + "loss": 1.1689019203186035, + "loss_ce": 0.003130426863208413, + "loss_iou": 0.396484375, + "loss_num": 0.07421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 21050632, + "step": 336 + }, + { + "epoch": 1.1214642262895174, + "grad_norm": 13.99282455444336, + "learning_rate": 5e-06, + "loss": 1.4048, + "num_input_tokens_seen": 21112928, + "step": 337 + }, + { + "epoch": 1.1214642262895174, + "loss": 1.4012787342071533, + "loss_ce": 0.0016206144355237484, + "loss_iou": 0.423828125, + "loss_num": 0.11083984375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 21112928, + "step": 337 + }, + { + "epoch": 1.124792013311148, + "grad_norm": 16.16678237915039, + "learning_rate": 5e-06, + "loss": 1.147, + "num_input_tokens_seen": 21176408, + "step": 338 + }, + { + "epoch": 1.124792013311148, + "loss": 0.9964649081230164, + "loss_ce": 0.00281256134621799, + "loss_iou": 0.26171875, + "loss_num": 0.09423828125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 21176408, + "step": 338 + }, + { + "epoch": 1.1281198003327786, + "grad_norm": 16.422903060913086, + "learning_rate": 5e-06, + "loss": 1.1554, + "num_input_tokens_seen": 21238804, + "step": 339 + }, + { + "epoch": 1.1281198003327786, + "loss": 1.3646725416183472, + "loss_ce": 0.006762400269508362, + "loss_iou": 0.345703125, + "loss_num": 0.1337890625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 21238804, + "step": 339 + }, + { + "epoch": 1.1314475873544092, + "grad_norm": 10.378382682800293, + "learning_rate": 5e-06, + "loss": 1.0336, + "num_input_tokens_seen": 21300804, + "step": 340 + }, + { + "epoch": 1.1314475873544092, + "loss": 1.2877411842346191, + "loss_ce": 0.03554388880729675, + "loss_iou": 0.41015625, + "loss_num": 0.0859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 21300804, + "step": 340 + }, + { + "epoch": 1.1347753743760398, + "grad_norm": 18.60308074951172, + "learning_rate": 5e-06, + "loss": 1.0494, + "num_input_tokens_seen": 21364188, + "step": 341 + }, + { + "epoch": 1.1347753743760398, + "loss": 1.0969760417938232, + "loss_ce": 0.0027377367950975895, + "loss_iou": 0.39453125, + "loss_num": 0.06103515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 21364188, + "step": 341 + }, + { + "epoch": 1.1381031613976704, + "grad_norm": 20.462738037109375, + "learning_rate": 5e-06, + "loss": 1.1857, + "num_input_tokens_seen": 21426420, + "step": 342 + }, + { + "epoch": 1.1381031613976704, + "loss": 1.0656057596206665, + "loss_ce": 0.0026175249367952347, + "loss_iou": 0.3203125, + "loss_num": 0.08447265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 21426420, + "step": 342 + }, + { + "epoch": 1.1414309484193013, + "grad_norm": 21.535289764404297, + "learning_rate": 5e-06, + "loss": 1.1397, + "num_input_tokens_seen": 21489396, + "step": 343 + }, + { + "epoch": 1.1414309484193013, + "loss": 1.322596549987793, + "loss_ce": 0.0036267684772610664, + "loss_iou": 0.421875, + "loss_num": 0.09521484375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 21489396, + "step": 343 + }, + { + "epoch": 1.1447587354409319, + "grad_norm": 17.741491317749023, + "learning_rate": 5e-06, + "loss": 1.0668, + "num_input_tokens_seen": 21553004, + "step": 344 + }, + { + "epoch": 1.1447587354409319, + "loss": 1.1139004230499268, + "loss_ce": 0.0011073811911046505, + "loss_iou": 0.36328125, + "loss_num": 0.07763671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 21553004, + "step": 344 + }, + { + "epoch": 1.1480865224625625, + "grad_norm": 16.684860229492188, + "learning_rate": 5e-06, + "loss": 1.2185, + "num_input_tokens_seen": 21616504, + "step": 345 + }, + { + "epoch": 1.1480865224625625, + "loss": 1.2288806438446045, + "loss_ce": 0.002318133134394884, + "loss_iou": 0.39453125, + "loss_num": 0.087890625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 21616504, + "step": 345 + }, + { + "epoch": 1.151414309484193, + "grad_norm": 13.319676399230957, + "learning_rate": 5e-06, + "loss": 0.9034, + "num_input_tokens_seen": 21679532, + "step": 346 + }, + { + "epoch": 1.151414309484193, + "loss": 0.8316326141357422, + "loss_ce": 0.002897227182984352, + "loss_iou": 0.267578125, + "loss_num": 0.058349609375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 21679532, + "step": 346 + }, + { + "epoch": 1.1547420965058237, + "grad_norm": 11.575480461120605, + "learning_rate": 5e-06, + "loss": 1.139, + "num_input_tokens_seen": 21742168, + "step": 347 + }, + { + "epoch": 1.1547420965058237, + "loss": 0.9771150946617126, + "loss_ce": 0.0007967862184159458, + "loss_iou": 0.314453125, + "loss_num": 0.0693359375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 21742168, + "step": 347 + }, + { + "epoch": 1.1580698835274543, + "grad_norm": 10.588814735412598, + "learning_rate": 5e-06, + "loss": 0.9637, + "num_input_tokens_seen": 21803688, + "step": 348 + }, + { + "epoch": 1.1580698835274543, + "loss": 0.6981990933418274, + "loss_ce": 0.00020107255841139704, + "loss_iou": 0.1669921875, + "loss_num": 0.07275390625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 21803688, + "step": 348 + }, + { + "epoch": 1.161397670549085, + "grad_norm": 13.000744819641113, + "learning_rate": 5e-06, + "loss": 0.7941, + "num_input_tokens_seen": 21867184, + "step": 349 + }, + { + "epoch": 1.161397670549085, + "loss": 1.015153169631958, + "loss_ce": 0.0019696177914738655, + "loss_iou": 0.27734375, + "loss_num": 0.09130859375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 21867184, + "step": 349 + }, + { + "epoch": 1.1647254575707155, + "grad_norm": 8.500694274902344, + "learning_rate": 5e-06, + "loss": 0.9189, + "num_input_tokens_seen": 21930068, + "step": 350 + }, + { + "epoch": 1.1647254575707155, + "loss": 0.8839759826660156, + "loss_ce": 0.004825552459806204, + "loss_iou": 0.296875, + "loss_num": 0.05712890625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 21930068, + "step": 350 + }, + { + "epoch": 1.1680532445923462, + "grad_norm": 22.050308227539062, + "learning_rate": 5e-06, + "loss": 0.85, + "num_input_tokens_seen": 21992112, + "step": 351 + }, + { + "epoch": 1.1680532445923462, + "loss": 0.871414065361023, + "loss_ce": 0.005691413767635822, + "loss_iou": 0.17578125, + "loss_num": 0.10302734375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 21992112, + "step": 351 + }, + { + "epoch": 1.1713810316139768, + "grad_norm": 15.947305679321289, + "learning_rate": 5e-06, + "loss": 0.9005, + "num_input_tokens_seen": 22055200, + "step": 352 + }, + { + "epoch": 1.1713810316139768, + "loss": 0.8017796277999878, + "loss_ce": 0.00209699384868145, + "loss_iou": 0.232421875, + "loss_num": 0.06689453125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 22055200, + "step": 352 + }, + { + "epoch": 1.1747088186356074, + "grad_norm": 15.728245735168457, + "learning_rate": 5e-06, + "loss": 0.9721, + "num_input_tokens_seen": 22117372, + "step": 353 + }, + { + "epoch": 1.1747088186356074, + "loss": 0.8560371398925781, + "loss_ce": 0.026935596019029617, + "loss_iou": 0.2470703125, + "loss_num": 0.0673828125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 22117372, + "step": 353 + }, + { + "epoch": 1.178036605657238, + "grad_norm": 9.21013355255127, + "learning_rate": 5e-06, + "loss": 1.0148, + "num_input_tokens_seen": 22180280, + "step": 354 + }, + { + "epoch": 1.178036605657238, + "loss": 1.0702283382415771, + "loss_ce": 0.0018690190045163035, + "loss_iou": 0.337890625, + "loss_num": 0.0791015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 22180280, + "step": 354 + }, + { + "epoch": 1.1813643926788686, + "grad_norm": 13.137001991271973, + "learning_rate": 5e-06, + "loss": 0.9983, + "num_input_tokens_seen": 22243800, + "step": 355 + }, + { + "epoch": 1.1813643926788686, + "loss": 0.9866027235984802, + "loss_ce": 0.002227720571681857, + "loss_iou": 0.34765625, + "loss_num": 0.057861328125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 22243800, + "step": 355 + }, + { + "epoch": 1.1846921797004992, + "grad_norm": 13.438755989074707, + "learning_rate": 5e-06, + "loss": 0.9054, + "num_input_tokens_seen": 22306152, + "step": 356 + }, + { + "epoch": 1.1846921797004992, + "loss": 0.93604576587677, + "loss_ce": 0.0034285818692296743, + "loss_iou": 0.3203125, + "loss_num": 0.05908203125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 22306152, + "step": 356 + }, + { + "epoch": 1.1880199667221298, + "grad_norm": 9.37387466430664, + "learning_rate": 5e-06, + "loss": 0.9098, + "num_input_tokens_seen": 22368308, + "step": 357 + }, + { + "epoch": 1.1880199667221298, + "loss": 0.8839547038078308, + "loss_ce": 0.0011421950766816735, + "loss_iou": 0.22265625, + "loss_num": 0.08740234375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 22368308, + "step": 357 + }, + { + "epoch": 1.1913477537437605, + "grad_norm": 15.862778663635254, + "learning_rate": 5e-06, + "loss": 0.9651, + "num_input_tokens_seen": 22430760, + "step": 358 + }, + { + "epoch": 1.1913477537437605, + "loss": 0.8542115688323975, + "loss_ce": 0.0004518293426372111, + "loss_iou": 0.2734375, + "loss_num": 0.061767578125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 22430760, + "step": 358 + }, + { + "epoch": 1.194675540765391, + "grad_norm": 13.395066261291504, + "learning_rate": 5e-06, + "loss": 1.2366, + "num_input_tokens_seen": 22494188, + "step": 359 + }, + { + "epoch": 1.194675540765391, + "loss": 1.1658010482788086, + "loss_ce": 0.0014943606220185757, + "loss_iou": 0.37890625, + "loss_num": 0.08154296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 22494188, + "step": 359 + }, + { + "epoch": 1.1980033277870217, + "grad_norm": 17.724477767944336, + "learning_rate": 5e-06, + "loss": 1.1109, + "num_input_tokens_seen": 22557396, + "step": 360 + }, + { + "epoch": 1.1980033277870217, + "loss": 0.9321660995483398, + "loss_ce": 0.0002813843311741948, + "loss_iou": 0.259765625, + "loss_num": 0.08251953125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 22557396, + "step": 360 + }, + { + "epoch": 1.2013311148086523, + "grad_norm": 15.220552444458008, + "learning_rate": 5e-06, + "loss": 1.3183, + "num_input_tokens_seen": 22619512, + "step": 361 + }, + { + "epoch": 1.2013311148086523, + "loss": 1.3823212385177612, + "loss_ce": 0.002438361756503582, + "loss_iou": 0.48828125, + "loss_num": 0.08056640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 22619512, + "step": 361 + }, + { + "epoch": 1.204658901830283, + "grad_norm": 13.84691047668457, + "learning_rate": 5e-06, + "loss": 0.9889, + "num_input_tokens_seen": 22683192, + "step": 362 + }, + { + "epoch": 1.204658901830283, + "loss": 1.0336413383483887, + "loss_ce": 0.005320955533534288, + "loss_iou": 0.326171875, + "loss_num": 0.07568359375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 22683192, + "step": 362 + }, + { + "epoch": 1.2079866888519135, + "grad_norm": 8.67003345489502, + "learning_rate": 5e-06, + "loss": 1.0009, + "num_input_tokens_seen": 22746804, + "step": 363 + }, + { + "epoch": 1.2079866888519135, + "loss": 0.8072459697723389, + "loss_ce": 0.001337698893621564, + "loss_iou": 0.2265625, + "loss_num": 0.0703125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 22746804, + "step": 363 + }, + { + "epoch": 1.2113144758735441, + "grad_norm": 19.87843894958496, + "learning_rate": 5e-06, + "loss": 1.1418, + "num_input_tokens_seen": 22809236, + "step": 364 + }, + { + "epoch": 1.2113144758735441, + "loss": 1.1470714807510376, + "loss_ce": 0.001075398176908493, + "loss_iou": 0.36328125, + "loss_num": 0.083984375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 22809236, + "step": 364 + }, + { + "epoch": 1.2146422628951747, + "grad_norm": 19.612537384033203, + "learning_rate": 5e-06, + "loss": 1.0437, + "num_input_tokens_seen": 22871224, + "step": 365 + }, + { + "epoch": 1.2146422628951747, + "loss": 1.0723521709442139, + "loss_ce": 0.0015513792168349028, + "loss_iou": 0.37109375, + "loss_num": 0.06591796875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 22871224, + "step": 365 + }, + { + "epoch": 1.2179700499168054, + "grad_norm": 13.617435455322266, + "learning_rate": 5e-06, + "loss": 1.1042, + "num_input_tokens_seen": 22934344, + "step": 366 + }, + { + "epoch": 1.2179700499168054, + "loss": 1.1154537200927734, + "loss_ce": 0.0016841854667291045, + "loss_iou": 0.3671875, + "loss_num": 0.07568359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 22934344, + "step": 366 + }, + { + "epoch": 1.221297836938436, + "grad_norm": 8.669763565063477, + "learning_rate": 5e-06, + "loss": 0.6066, + "num_input_tokens_seen": 22996164, + "step": 367 + }, + { + "epoch": 1.221297836938436, + "loss": 0.5188974142074585, + "loss_ce": 0.005957929417490959, + "loss_iou": 0.1201171875, + "loss_num": 0.0546875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 22996164, + "step": 367 + }, + { + "epoch": 1.2246256239600666, + "grad_norm": 12.283041000366211, + "learning_rate": 5e-06, + "loss": 0.9514, + "num_input_tokens_seen": 23058328, + "step": 368 + }, + { + "epoch": 1.2246256239600666, + "loss": 1.0867336988449097, + "loss_ce": 0.0010403767228126526, + "loss_iou": 0.3046875, + "loss_num": 0.09521484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 23058328, + "step": 368 + }, + { + "epoch": 1.2279534109816972, + "grad_norm": 25.49207305908203, + "learning_rate": 5e-06, + "loss": 1.1151, + "num_input_tokens_seen": 23120736, + "step": 369 + }, + { + "epoch": 1.2279534109816972, + "loss": 1.349541425704956, + "loss_ce": 0.0018851247150450945, + "loss_iou": 0.455078125, + "loss_num": 0.08740234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 23120736, + "step": 369 + }, + { + "epoch": 1.2312811980033278, + "grad_norm": 20.782428741455078, + "learning_rate": 5e-06, + "loss": 1.2506, + "num_input_tokens_seen": 23182684, + "step": 370 + }, + { + "epoch": 1.2312811980033278, + "loss": 1.2768054008483887, + "loss_ce": 0.00043815511162392795, + "loss_iou": 0.37890625, + "loss_num": 0.103515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 23182684, + "step": 370 + }, + { + "epoch": 1.2346089850249584, + "grad_norm": 12.254620552062988, + "learning_rate": 5e-06, + "loss": 1.2855, + "num_input_tokens_seen": 23244864, + "step": 371 + }, + { + "epoch": 1.2346089850249584, + "loss": 1.1182758808135986, + "loss_ce": 0.002309112809598446, + "loss_iou": 0.349609375, + "loss_num": 0.08349609375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 23244864, + "step": 371 + }, + { + "epoch": 1.237936772046589, + "grad_norm": 20.859880447387695, + "learning_rate": 5e-06, + "loss": 1.0792, + "num_input_tokens_seen": 23308204, + "step": 372 + }, + { + "epoch": 1.237936772046589, + "loss": 1.1883625984191895, + "loss_ce": 0.00037434539990499616, + "loss_iou": 0.365234375, + "loss_num": 0.091796875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 23308204, + "step": 372 + }, + { + "epoch": 1.2412645590682196, + "grad_norm": 10.013941764831543, + "learning_rate": 5e-06, + "loss": 0.9596, + "num_input_tokens_seen": 23371756, + "step": 373 + }, + { + "epoch": 1.2412645590682196, + "loss": 1.0599653720855713, + "loss_ce": 0.0008833592291921377, + "loss_iou": 0.35546875, + "loss_num": 0.06982421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 23371756, + "step": 373 + }, + { + "epoch": 1.2445923460898503, + "grad_norm": 15.680611610412598, + "learning_rate": 5e-06, + "loss": 1.147, + "num_input_tokens_seen": 23434380, + "step": 374 + }, + { + "epoch": 1.2445923460898503, + "loss": 1.3000984191894531, + "loss_ce": 0.0020027763675898314, + "loss_iou": 0.349609375, + "loss_num": 0.11962890625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 23434380, + "step": 374 + }, + { + "epoch": 1.2479201331114809, + "grad_norm": 55.765045166015625, + "learning_rate": 5e-06, + "loss": 0.9942, + "num_input_tokens_seen": 23496936, + "step": 375 + }, + { + "epoch": 1.2479201331114809, + "loss": 0.8713576793670654, + "loss_ce": 0.0001418392639607191, + "loss_iou": 0.318359375, + "loss_num": 0.046630859375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 23496936, + "step": 375 + }, + { + "epoch": 1.2512479201331115, + "grad_norm": 11.986567497253418, + "learning_rate": 5e-06, + "loss": 1.1553, + "num_input_tokens_seen": 23561228, + "step": 376 + }, + { + "epoch": 1.2512479201331115, + "loss": 1.2353582382202148, + "loss_ce": 0.008063295856118202, + "loss_iou": 0.41796875, + "loss_num": 0.078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 23561228, + "step": 376 + }, + { + "epoch": 1.254575707154742, + "grad_norm": 15.518547058105469, + "learning_rate": 5e-06, + "loss": 1.1645, + "num_input_tokens_seen": 23623508, + "step": 377 + }, + { + "epoch": 1.254575707154742, + "loss": 1.1245479583740234, + "loss_ce": 0.011266733519732952, + "loss_iou": 0.416015625, + "loss_num": 0.056396484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 23623508, + "step": 377 + }, + { + "epoch": 1.2579034941763727, + "grad_norm": 12.487281799316406, + "learning_rate": 5e-06, + "loss": 1.1126, + "num_input_tokens_seen": 23686012, + "step": 378 + }, + { + "epoch": 1.2579034941763727, + "loss": 1.173827052116394, + "loss_ce": 0.0009754931088536978, + "loss_iou": 0.3359375, + "loss_num": 0.1005859375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 23686012, + "step": 378 + }, + { + "epoch": 1.2612312811980033, + "grad_norm": 14.35226821899414, + "learning_rate": 5e-06, + "loss": 1.0547, + "num_input_tokens_seen": 23749124, + "step": 379 + }, + { + "epoch": 1.2612312811980033, + "loss": 1.0610747337341309, + "loss_ce": 0.0007720579742453992, + "loss_iou": 0.361328125, + "loss_num": 0.0673828125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 23749124, + "step": 379 + }, + { + "epoch": 1.264559068219634, + "grad_norm": 24.53921890258789, + "learning_rate": 5e-06, + "loss": 1.2042, + "num_input_tokens_seen": 23811144, + "step": 380 + }, + { + "epoch": 1.264559068219634, + "loss": 1.225796103477478, + "loss_ce": 0.0036280876956880093, + "loss_iou": 0.388671875, + "loss_num": 0.08984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 23811144, + "step": 380 + }, + { + "epoch": 1.2678868552412645, + "grad_norm": 13.439510345458984, + "learning_rate": 5e-06, + "loss": 0.9866, + "num_input_tokens_seen": 23874480, + "step": 381 + }, + { + "epoch": 1.2678868552412645, + "loss": 0.8618694543838501, + "loss_ce": 0.0004192190826870501, + "loss_iou": 0.283203125, + "loss_num": 0.05859375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 23874480, + "step": 381 + }, + { + "epoch": 1.2712146422628952, + "grad_norm": 16.41143035888672, + "learning_rate": 5e-06, + "loss": 0.9888, + "num_input_tokens_seen": 23936680, + "step": 382 + }, + { + "epoch": 1.2712146422628952, + "loss": 0.9557693004608154, + "loss_ce": 0.00514674698933959, + "loss_iou": 0.2490234375, + "loss_num": 0.09033203125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 23936680, + "step": 382 + }, + { + "epoch": 1.2745424292845258, + "grad_norm": 20.497072219848633, + "learning_rate": 5e-06, + "loss": 0.873, + "num_input_tokens_seen": 23997596, + "step": 383 + }, + { + "epoch": 1.2745424292845258, + "loss": 0.9353244304656982, + "loss_ce": 0.0036837609950453043, + "loss_iou": 0.2890625, + "loss_num": 0.07080078125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 23997596, + "step": 383 + }, + { + "epoch": 1.2778702163061564, + "grad_norm": 16.8009033203125, + "learning_rate": 5e-06, + "loss": 0.836, + "num_input_tokens_seen": 24059808, + "step": 384 + }, + { + "epoch": 1.2778702163061564, + "loss": 0.957974910736084, + "loss_ce": 0.0016761153237894177, + "loss_iou": 0.306640625, + "loss_num": 0.068359375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 24059808, + "step": 384 + }, + { + "epoch": 1.281198003327787, + "grad_norm": 16.275964736938477, + "learning_rate": 5e-06, + "loss": 0.9754, + "num_input_tokens_seen": 24123296, + "step": 385 + }, + { + "epoch": 1.281198003327787, + "loss": 1.200371503829956, + "loss_ce": 0.0004203752614557743, + "loss_iou": 0.37109375, + "loss_num": 0.09130859375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 24123296, + "step": 385 + }, + { + "epoch": 1.2845257903494176, + "grad_norm": 15.432588577270508, + "learning_rate": 5e-06, + "loss": 1.1704, + "num_input_tokens_seen": 24186340, + "step": 386 + }, + { + "epoch": 1.2845257903494176, + "loss": 1.2510305643081665, + "loss_ce": 0.006401653401553631, + "loss_iou": 0.453125, + "loss_num": 0.068359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 24186340, + "step": 386 + }, + { + "epoch": 1.2878535773710482, + "grad_norm": 12.922508239746094, + "learning_rate": 5e-06, + "loss": 1.2301, + "num_input_tokens_seen": 24250244, + "step": 387 + }, + { + "epoch": 1.2878535773710482, + "loss": 1.315189242362976, + "loss_ce": 0.0007360831368714571, + "loss_iou": 0.486328125, + "loss_num": 0.068359375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 24250244, + "step": 387 + }, + { + "epoch": 1.2911813643926788, + "grad_norm": 16.12157440185547, + "learning_rate": 5e-06, + "loss": 1.0807, + "num_input_tokens_seen": 24312700, + "step": 388 + }, + { + "epoch": 1.2911813643926788, + "loss": 0.9249197840690613, + "loss_ce": 0.00011512526543810964, + "loss_iou": 0.29296875, + "loss_num": 0.0673828125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 24312700, + "step": 388 + }, + { + "epoch": 1.2945091514143094, + "grad_norm": 15.61776351928711, + "learning_rate": 5e-06, + "loss": 1.206, + "num_input_tokens_seen": 24372944, + "step": 389 + }, + { + "epoch": 1.2945091514143094, + "loss": 1.2747079133987427, + "loss_ce": 0.00664151506498456, + "loss_iou": 0.291015625, + "loss_num": 0.1376953125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 24372944, + "step": 389 + }, + { + "epoch": 1.29783693843594, + "grad_norm": 10.26531982421875, + "learning_rate": 5e-06, + "loss": 1.1097, + "num_input_tokens_seen": 24436248, + "step": 390 + }, + { + "epoch": 1.29783693843594, + "loss": 1.227935791015625, + "loss_ce": 0.0003966076474171132, + "loss_iou": 0.416015625, + "loss_num": 0.0791015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 24436248, + "step": 390 + }, + { + "epoch": 1.3011647254575707, + "grad_norm": 10.401841163635254, + "learning_rate": 5e-06, + "loss": 1.0097, + "num_input_tokens_seen": 24498940, + "step": 391 + }, + { + "epoch": 1.3011647254575707, + "loss": 1.0491610765457153, + "loss_ce": 0.004727460443973541, + "loss_iou": 0.314453125, + "loss_num": 0.08349609375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 24498940, + "step": 391 + }, + { + "epoch": 1.3044925124792013, + "grad_norm": 17.991519927978516, + "learning_rate": 5e-06, + "loss": 1.1516, + "num_input_tokens_seen": 24563372, + "step": 392 + }, + { + "epoch": 1.3044925124792013, + "loss": 1.063659906387329, + "loss_ce": 0.0001832506968639791, + "loss_iou": 0.357421875, + "loss_num": 0.0693359375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 24563372, + "step": 392 + }, + { + "epoch": 1.307820299500832, + "grad_norm": 16.1484375, + "learning_rate": 5e-06, + "loss": 0.8481, + "num_input_tokens_seen": 24625780, + "step": 393 + }, + { + "epoch": 1.307820299500832, + "loss": 0.6541198492050171, + "loss_ce": 6.708937871735543e-05, + "loss_iou": 0.279296875, + "loss_num": 0.018798828125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 24625780, + "step": 393 + }, + { + "epoch": 1.3111480865224625, + "grad_norm": 15.9221773147583, + "learning_rate": 5e-06, + "loss": 0.9679, + "num_input_tokens_seen": 24687492, + "step": 394 + }, + { + "epoch": 1.3111480865224625, + "loss": 1.267278790473938, + "loss_ce": 0.0021421266719698906, + "loss_iou": 0.40625, + "loss_num": 0.09033203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 24687492, + "step": 394 + }, + { + "epoch": 1.3144758735440931, + "grad_norm": 23.325969696044922, + "learning_rate": 5e-06, + "loss": 1.177, + "num_input_tokens_seen": 24752340, + "step": 395 + }, + { + "epoch": 1.3144758735440931, + "loss": 1.2171975374221802, + "loss_ce": 0.001377263804897666, + "loss_iou": 0.431640625, + "loss_num": 0.07080078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 24752340, + "step": 395 + }, + { + "epoch": 1.3178036605657237, + "grad_norm": 43.90242004394531, + "learning_rate": 5e-06, + "loss": 1.1792, + "num_input_tokens_seen": 24814652, + "step": 396 + }, + { + "epoch": 1.3178036605657237, + "loss": 1.2642714977264404, + "loss_ce": 0.0020644143223762512, + "loss_iou": 0.416015625, + "loss_num": 0.08642578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 24814652, + "step": 396 + }, + { + "epoch": 1.3211314475873543, + "grad_norm": 45.1225700378418, + "learning_rate": 5e-06, + "loss": 1.1999, + "num_input_tokens_seen": 24878016, + "step": 397 + }, + { + "epoch": 1.3211314475873543, + "loss": 1.1546871662139893, + "loss_ce": 0.003564041806384921, + "loss_iou": 0.388671875, + "loss_num": 0.0751953125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 24878016, + "step": 397 + }, + { + "epoch": 1.324459234608985, + "grad_norm": 19.703289031982422, + "learning_rate": 5e-06, + "loss": 1.1245, + "num_input_tokens_seen": 24939340, + "step": 398 + }, + { + "epoch": 1.324459234608985, + "loss": 0.9508374333381653, + "loss_ce": 0.000153833330841735, + "loss_iou": 0.3203125, + "loss_num": 0.06201171875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 24939340, + "step": 398 + }, + { + "epoch": 1.3277870216306156, + "grad_norm": 11.191904067993164, + "learning_rate": 5e-06, + "loss": 0.963, + "num_input_tokens_seen": 25002132, + "step": 399 + }, + { + "epoch": 1.3277870216306156, + "loss": 0.7988940477371216, + "loss_ce": 0.006901869084686041, + "loss_iou": 0.265625, + "loss_num": 0.05224609375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 25002132, + "step": 399 + }, + { + "epoch": 1.3311148086522462, + "grad_norm": 10.622575759887695, + "learning_rate": 5e-06, + "loss": 1.1994, + "num_input_tokens_seen": 25066044, + "step": 400 + }, + { + "epoch": 1.3311148086522462, + "loss": 1.0515964031219482, + "loss_ce": 0.0037447987124323845, + "loss_iou": 0.353515625, + "loss_num": 0.068359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 25066044, + "step": 400 + }, + { + "epoch": 1.3344425956738768, + "grad_norm": 24.427770614624023, + "learning_rate": 5e-06, + "loss": 0.9622, + "num_input_tokens_seen": 25128340, + "step": 401 + }, + { + "epoch": 1.3344425956738768, + "loss": 0.9643534421920776, + "loss_ce": 0.002195254433900118, + "loss_iou": 0.216796875, + "loss_num": 0.10595703125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 25128340, + "step": 401 + }, + { + "epoch": 1.3377703826955074, + "grad_norm": 10.355712890625, + "learning_rate": 5e-06, + "loss": 1.0325, + "num_input_tokens_seen": 25192192, + "step": 402 + }, + { + "epoch": 1.3377703826955074, + "loss": 1.0074443817138672, + "loss_ce": 0.001585077028721571, + "loss_iou": 0.33203125, + "loss_num": 0.06884765625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 25192192, + "step": 402 + }, + { + "epoch": 1.341098169717138, + "grad_norm": 21.185884475708008, + "learning_rate": 5e-06, + "loss": 1.0896, + "num_input_tokens_seen": 25255636, + "step": 403 + }, + { + "epoch": 1.341098169717138, + "loss": 0.9336959719657898, + "loss_ce": 0.00144498934969306, + "loss_iou": 0.345703125, + "loss_num": 0.04833984375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 25255636, + "step": 403 + }, + { + "epoch": 1.3444259567387689, + "grad_norm": 9.39505386352539, + "learning_rate": 5e-06, + "loss": 1.0355, + "num_input_tokens_seen": 25317992, + "step": 404 + }, + { + "epoch": 1.3444259567387689, + "loss": 1.0250355005264282, + "loss_ce": 0.00013315524847712368, + "loss_iou": 0.353515625, + "loss_num": 0.0634765625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 25317992, + "step": 404 + }, + { + "epoch": 1.3477537437603995, + "grad_norm": 20.77029037475586, + "learning_rate": 5e-06, + "loss": 1.1133, + "num_input_tokens_seen": 25380772, + "step": 405 + }, + { + "epoch": 1.3477537437603995, + "loss": 1.0792059898376465, + "loss_ce": 0.011090747080743313, + "loss_iou": 0.296875, + "loss_num": 0.09521484375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 25380772, + "step": 405 + }, + { + "epoch": 1.35108153078203, + "grad_norm": 31.16495132446289, + "learning_rate": 5e-06, + "loss": 1.1511, + "num_input_tokens_seen": 25445244, + "step": 406 + }, + { + "epoch": 1.35108153078203, + "loss": 1.2152575254440308, + "loss_ce": 0.0004138269869145006, + "loss_iou": 0.390625, + "loss_num": 0.08740234375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 25445244, + "step": 406 + }, + { + "epoch": 1.3544093178036607, + "grad_norm": 20.450408935546875, + "learning_rate": 5e-06, + "loss": 0.8806, + "num_input_tokens_seen": 25507188, + "step": 407 + }, + { + "epoch": 1.3544093178036607, + "loss": 0.6218196153640747, + "loss_ce": 0.00023759223404340446, + "loss_iou": 0.15234375, + "loss_num": 0.0634765625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 25507188, + "step": 407 + }, + { + "epoch": 1.3577371048252913, + "grad_norm": 16.568866729736328, + "learning_rate": 5e-06, + "loss": 0.9924, + "num_input_tokens_seen": 25570620, + "step": 408 + }, + { + "epoch": 1.3577371048252913, + "loss": 0.8238029479980469, + "loss_ce": 0.022045187652111053, + "loss_iou": 0.2314453125, + "loss_num": 0.06787109375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 25570620, + "step": 408 + }, + { + "epoch": 1.361064891846922, + "grad_norm": 17.278478622436523, + "learning_rate": 5e-06, + "loss": 1.0307, + "num_input_tokens_seen": 25633892, + "step": 409 + }, + { + "epoch": 1.361064891846922, + "loss": 1.0489603281021118, + "loss_ce": 0.0023294282145798206, + "loss_iou": 0.32421875, + "loss_num": 0.0791015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 25633892, + "step": 409 + }, + { + "epoch": 1.3643926788685525, + "grad_norm": 12.66992473602295, + "learning_rate": 5e-06, + "loss": 1.148, + "num_input_tokens_seen": 25697340, + "step": 410 + }, + { + "epoch": 1.3643926788685525, + "loss": 1.0088579654693604, + "loss_ce": 0.003456423059105873, + "loss_iou": 0.2890625, + "loss_num": 0.08544921875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 25697340, + "step": 410 + }, + { + "epoch": 1.3677204658901831, + "grad_norm": 6.634986400604248, + "learning_rate": 5e-06, + "loss": 0.8108, + "num_input_tokens_seen": 25758432, + "step": 411 + }, + { + "epoch": 1.3677204658901831, + "loss": 0.9283370971679688, + "loss_ce": 0.00426485575735569, + "loss_iou": 0.267578125, + "loss_num": 0.07763671875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 25758432, + "step": 411 + }, + { + "epoch": 1.3710482529118138, + "grad_norm": 10.549354553222656, + "learning_rate": 5e-06, + "loss": 1.0254, + "num_input_tokens_seen": 25822248, + "step": 412 + }, + { + "epoch": 1.3710482529118138, + "loss": 0.9352825284004211, + "loss_ce": 0.005350937135517597, + "loss_iou": 0.294921875, + "loss_num": 0.068359375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 25822248, + "step": 412 + }, + { + "epoch": 1.3743760399334444, + "grad_norm": 14.273979187011719, + "learning_rate": 5e-06, + "loss": 1.1039, + "num_input_tokens_seen": 25884780, + "step": 413 + }, + { + "epoch": 1.3743760399334444, + "loss": 1.2133036851882935, + "loss_ce": 0.0009013470844365656, + "loss_iou": 0.42578125, + "loss_num": 0.072265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 25884780, + "step": 413 + }, + { + "epoch": 1.377703826955075, + "grad_norm": 14.600478172302246, + "learning_rate": 5e-06, + "loss": 0.9331, + "num_input_tokens_seen": 25946912, + "step": 414 + }, + { + "epoch": 1.377703826955075, + "loss": 0.6742814183235168, + "loss_ce": 0.00045326852705329657, + "loss_iou": 0.1923828125, + "loss_num": 0.057861328125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 25946912, + "step": 414 + }, + { + "epoch": 1.3810316139767056, + "grad_norm": 11.724282264709473, + "learning_rate": 5e-06, + "loss": 1.0044, + "num_input_tokens_seen": 26009668, + "step": 415 + }, + { + "epoch": 1.3810316139767056, + "loss": 1.0532835721969604, + "loss_ce": 6.0868831496918574e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0771484375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 26009668, + "step": 415 + }, + { + "epoch": 1.3843594009983362, + "grad_norm": 10.00571346282959, + "learning_rate": 5e-06, + "loss": 0.9009, + "num_input_tokens_seen": 26071008, + "step": 416 + }, + { + "epoch": 1.3843594009983362, + "loss": 1.1453657150268555, + "loss_ce": 0.044779855757951736, + "loss_iou": 0.3125, + "loss_num": 0.09521484375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 26071008, + "step": 416 + }, + { + "epoch": 1.3876871880199668, + "grad_norm": 15.58684253692627, + "learning_rate": 5e-06, + "loss": 1.1956, + "num_input_tokens_seen": 26132352, + "step": 417 + }, + { + "epoch": 1.3876871880199668, + "loss": 0.8832725882530212, + "loss_ce": 0.00046005373587831855, + "loss_iou": 0.2373046875, + "loss_num": 0.08154296875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 26132352, + "step": 417 + }, + { + "epoch": 1.3910149750415974, + "grad_norm": 19.847135543823242, + "learning_rate": 5e-06, + "loss": 0.9647, + "num_input_tokens_seen": 26195748, + "step": 418 + }, + { + "epoch": 1.3910149750415974, + "loss": 0.9432509541511536, + "loss_ce": 0.0030654240399599075, + "loss_iou": 0.35546875, + "loss_num": 0.046142578125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 26195748, + "step": 418 + }, + { + "epoch": 1.394342762063228, + "grad_norm": 34.97438049316406, + "learning_rate": 5e-06, + "loss": 1.1494, + "num_input_tokens_seen": 26257164, + "step": 419 + }, + { + "epoch": 1.394342762063228, + "loss": 1.2391083240509033, + "loss_ce": 9.462250454816967e-05, + "loss_iou": 0.41015625, + "loss_num": 0.08349609375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 26257164, + "step": 419 + }, + { + "epoch": 1.3976705490848587, + "grad_norm": 14.589425086975098, + "learning_rate": 5e-06, + "loss": 1.1584, + "num_input_tokens_seen": 26320716, + "step": 420 + }, + { + "epoch": 1.3976705490848587, + "loss": 1.2718900442123413, + "loss_ce": 0.004800152964890003, + "loss_iou": 0.380859375, + "loss_num": 0.10107421875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 26320716, + "step": 420 + }, + { + "epoch": 1.4009983361064893, + "grad_norm": 13.36715316772461, + "learning_rate": 5e-06, + "loss": 1.1376, + "num_input_tokens_seen": 26384520, + "step": 421 + }, + { + "epoch": 1.4009983361064893, + "loss": 0.8977671265602112, + "loss_ce": 0.0003061619936488569, + "loss_iou": 0.31640625, + "loss_num": 0.052734375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 26384520, + "step": 421 + }, + { + "epoch": 1.4043261231281199, + "grad_norm": 23.56789779663086, + "learning_rate": 5e-06, + "loss": 1.0789, + "num_input_tokens_seen": 26447724, + "step": 422 + }, + { + "epoch": 1.4043261231281199, + "loss": 1.1214494705200195, + "loss_ce": 0.003285370534285903, + "loss_iou": 0.3828125, + "loss_num": 0.07080078125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 26447724, + "step": 422 + }, + { + "epoch": 1.4076539101497505, + "grad_norm": 17.013761520385742, + "learning_rate": 5e-06, + "loss": 1.0713, + "num_input_tokens_seen": 26510680, + "step": 423 + }, + { + "epoch": 1.4076539101497505, + "loss": 1.0345571041107178, + "loss_ce": 0.00037738040555268526, + "loss_iou": 0.2890625, + "loss_num": 0.09130859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 26510680, + "step": 423 + }, + { + "epoch": 1.410981697171381, + "grad_norm": 41.10530471801758, + "learning_rate": 5e-06, + "loss": 1.1384, + "num_input_tokens_seen": 26574016, + "step": 424 + }, + { + "epoch": 1.410981697171381, + "loss": 1.054764747619629, + "loss_ce": 7.73034553276375e-05, + "loss_iou": 0.3359375, + "loss_num": 0.07666015625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 26574016, + "step": 424 + }, + { + "epoch": 1.4143094841930117, + "grad_norm": 81.72950744628906, + "learning_rate": 5e-06, + "loss": 1.1902, + "num_input_tokens_seen": 26637704, + "step": 425 + }, + { + "epoch": 1.4143094841930117, + "loss": 0.9354544878005981, + "loss_ce": 0.0003958155866712332, + "loss_iou": 0.341796875, + "loss_num": 0.050537109375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 26637704, + "step": 425 + }, + { + "epoch": 1.4176372712146423, + "grad_norm": 29.790283203125, + "learning_rate": 5e-06, + "loss": 1.1938, + "num_input_tokens_seen": 26701468, + "step": 426 + }, + { + "epoch": 1.4176372712146423, + "loss": 0.8883048892021179, + "loss_ce": 0.002562721725553274, + "loss_iou": 0.228515625, + "loss_num": 0.08544921875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 26701468, + "step": 426 + }, + { + "epoch": 1.420965058236273, + "grad_norm": 16.44548797607422, + "learning_rate": 5e-06, + "loss": 1.1486, + "num_input_tokens_seen": 26764044, + "step": 427 + }, + { + "epoch": 1.420965058236273, + "loss": 1.1505101919174194, + "loss_ce": 0.0006078255828469992, + "loss_iou": 0.337890625, + "loss_num": 0.0947265625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 26764044, + "step": 427 + }, + { + "epoch": 1.4242928452579036, + "grad_norm": 194.185791015625, + "learning_rate": 5e-06, + "loss": 0.9832, + "num_input_tokens_seen": 26826320, + "step": 428 + }, + { + "epoch": 1.4242928452579036, + "loss": 0.9561614990234375, + "loss_ce": 0.004989705514162779, + "loss_iou": 0.212890625, + "loss_num": 0.10546875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 26826320, + "step": 428 + }, + { + "epoch": 1.4276206322795342, + "grad_norm": 27.536348342895508, + "learning_rate": 5e-06, + "loss": 1.0146, + "num_input_tokens_seen": 26889240, + "step": 429 + }, + { + "epoch": 1.4276206322795342, + "loss": 1.2331770658493042, + "loss_ce": 0.004661452490836382, + "loss_iou": 0.40625, + "loss_num": 0.0830078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 26889240, + "step": 429 + }, + { + "epoch": 1.4309484193011648, + "grad_norm": 23.240528106689453, + "learning_rate": 5e-06, + "loss": 1.2288, + "num_input_tokens_seen": 26952020, + "step": 430 + }, + { + "epoch": 1.4309484193011648, + "loss": 1.34483003616333, + "loss_ce": 0.00010352435492677614, + "loss_iou": 0.423828125, + "loss_num": 0.09912109375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 26952020, + "step": 430 + }, + { + "epoch": 1.4342762063227954, + "grad_norm": 14.340412139892578, + "learning_rate": 5e-06, + "loss": 1.1576, + "num_input_tokens_seen": 27015052, + "step": 431 + }, + { + "epoch": 1.4342762063227954, + "loss": 1.1539814472198486, + "loss_ce": 0.0004169994790572673, + "loss_iou": 0.375, + "loss_num": 0.0810546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 27015052, + "step": 431 + }, + { + "epoch": 1.437603993344426, + "grad_norm": 13.505350112915039, + "learning_rate": 5e-06, + "loss": 0.9107, + "num_input_tokens_seen": 27077640, + "step": 432 + }, + { + "epoch": 1.437603993344426, + "loss": 0.8342275023460388, + "loss_ce": 0.0010976643534377217, + "loss_iou": 0.265625, + "loss_num": 0.060546875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 27077640, + "step": 432 + }, + { + "epoch": 1.4409317803660566, + "grad_norm": 15.690736770629883, + "learning_rate": 5e-06, + "loss": 0.8302, + "num_input_tokens_seen": 27140080, + "step": 433 + }, + { + "epoch": 1.4409317803660566, + "loss": 0.9211246967315674, + "loss_ce": 0.0016911044949665666, + "loss_iou": 0.287109375, + "loss_num": 0.06884765625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 27140080, + "step": 433 + }, + { + "epoch": 1.4442595673876872, + "grad_norm": 13.289794921875, + "learning_rate": 5e-06, + "loss": 0.8306, + "num_input_tokens_seen": 27203336, + "step": 434 + }, + { + "epoch": 1.4442595673876872, + "loss": 0.7346397042274475, + "loss_ce": 0.00050883594667539, + "loss_iou": 0.267578125, + "loss_num": 0.03955078125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 27203336, + "step": 434 + }, + { + "epoch": 1.4475873544093179, + "grad_norm": 11.401854515075684, + "learning_rate": 5e-06, + "loss": 0.9116, + "num_input_tokens_seen": 27265880, + "step": 435 + }, + { + "epoch": 1.4475873544093179, + "loss": 0.9152499437332153, + "loss_ce": 0.0006992316339164972, + "loss_iou": 0.30859375, + "loss_num": 0.05908203125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 27265880, + "step": 435 + }, + { + "epoch": 1.4509151414309485, + "grad_norm": 10.400731086730957, + "learning_rate": 5e-06, + "loss": 1.1403, + "num_input_tokens_seen": 27329740, + "step": 436 + }, + { + "epoch": 1.4509151414309485, + "loss": 1.0320229530334473, + "loss_ce": 0.0005287755047902465, + "loss_iou": 0.345703125, + "loss_num": 0.068359375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 27329740, + "step": 436 + }, + { + "epoch": 1.454242928452579, + "grad_norm": 18.541240692138672, + "learning_rate": 5e-06, + "loss": 0.7472, + "num_input_tokens_seen": 27392388, + "step": 437 + }, + { + "epoch": 1.454242928452579, + "loss": 0.6203223466873169, + "loss_ce": 0.0013037655735388398, + "loss_iou": 0.130859375, + "loss_num": 0.0712890625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 27392388, + "step": 437 + }, + { + "epoch": 1.4575707154742097, + "grad_norm": 22.43004035949707, + "learning_rate": 5e-06, + "loss": 0.9605, + "num_input_tokens_seen": 27453448, + "step": 438 + }, + { + "epoch": 1.4575707154742097, + "loss": 0.6307517290115356, + "loss_ce": 0.00013653574569616467, + "loss_iou": 0.103515625, + "loss_num": 0.08447265625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 27453448, + "step": 438 + }, + { + "epoch": 1.4608985024958403, + "grad_norm": 13.829190254211426, + "learning_rate": 5e-06, + "loss": 0.9866, + "num_input_tokens_seen": 27515228, + "step": 439 + }, + { + "epoch": 1.4608985024958403, + "loss": 0.9368977546691895, + "loss_ce": 0.004280570894479752, + "loss_iou": 0.2890625, + "loss_num": 0.07080078125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 27515228, + "step": 439 + }, + { + "epoch": 1.464226289517471, + "grad_norm": 14.281774520874023, + "learning_rate": 5e-06, + "loss": 0.8164, + "num_input_tokens_seen": 27577956, + "step": 440 + }, + { + "epoch": 1.464226289517471, + "loss": 0.7943712472915649, + "loss_ce": 0.002379045821726322, + "loss_iou": 0.232421875, + "loss_num": 0.06591796875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 27577956, + "step": 440 + }, + { + "epoch": 1.4675540765391015, + "grad_norm": 11.133589744567871, + "learning_rate": 5e-06, + "loss": 1.0627, + "num_input_tokens_seen": 27641140, + "step": 441 + }, + { + "epoch": 1.4675540765391015, + "loss": 1.2072147130966187, + "loss_ce": 0.0011600162833929062, + "loss_iou": 0.357421875, + "loss_num": 0.09814453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 27641140, + "step": 441 + }, + { + "epoch": 1.4708818635607321, + "grad_norm": 15.000788688659668, + "learning_rate": 5e-06, + "loss": 0.919, + "num_input_tokens_seen": 27702864, + "step": 442 + }, + { + "epoch": 1.4708818635607321, + "loss": 1.0833325386047363, + "loss_ce": 0.0015454718377441168, + "loss_iou": 0.353515625, + "loss_num": 0.0751953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 27702864, + "step": 442 + }, + { + "epoch": 1.4742096505823628, + "grad_norm": 10.785703659057617, + "learning_rate": 5e-06, + "loss": 0.9502, + "num_input_tokens_seen": 27766516, + "step": 443 + }, + { + "epoch": 1.4742096505823628, + "loss": 0.9589688777923584, + "loss_ce": 0.0007169640157371759, + "loss_iou": 0.31640625, + "loss_num": 0.0654296875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 27766516, + "step": 443 + }, + { + "epoch": 1.4775374376039934, + "grad_norm": 14.798559188842773, + "learning_rate": 5e-06, + "loss": 1.033, + "num_input_tokens_seen": 27829568, + "step": 444 + }, + { + "epoch": 1.4775374376039934, + "loss": 1.023010015487671, + "loss_ce": 0.00030495758983306587, + "loss_iou": 0.3515625, + "loss_num": 0.0634765625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 27829568, + "step": 444 + }, + { + "epoch": 1.480865224625624, + "grad_norm": 11.827593803405762, + "learning_rate": 5e-06, + "loss": 0.6733, + "num_input_tokens_seen": 27892536, + "step": 445 + }, + { + "epoch": 1.480865224625624, + "loss": 0.5998960733413696, + "loss_ce": 0.0006834049127064645, + "loss_iou": 0.19140625, + "loss_num": 0.043212890625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 27892536, + "step": 445 + }, + { + "epoch": 1.4841930116472546, + "grad_norm": 12.027179718017578, + "learning_rate": 5e-06, + "loss": 0.9374, + "num_input_tokens_seen": 27954480, + "step": 446 + }, + { + "epoch": 1.4841930116472546, + "loss": 0.8107286095619202, + "loss_ce": 0.005674911662936211, + "loss_iou": 0.251953125, + "loss_num": 0.060302734375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 27954480, + "step": 446 + }, + { + "epoch": 1.4875207986688852, + "grad_norm": 12.46907901763916, + "learning_rate": 5e-06, + "loss": 0.9105, + "num_input_tokens_seen": 28015384, + "step": 447 + }, + { + "epoch": 1.4875207986688852, + "loss": 1.007506012916565, + "loss_ce": 0.00018174726574216038, + "loss_iou": 0.24609375, + "loss_num": 0.10302734375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 28015384, + "step": 447 + }, + { + "epoch": 1.4908485856905158, + "grad_norm": 19.959171295166016, + "learning_rate": 5e-06, + "loss": 0.984, + "num_input_tokens_seen": 28077524, + "step": 448 + }, + { + "epoch": 1.4908485856905158, + "loss": 1.0535316467285156, + "loss_ce": 0.0010414018761366606, + "loss_iou": 0.3046875, + "loss_num": 0.08837890625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 28077524, + "step": 448 + }, + { + "epoch": 1.4941763727121464, + "grad_norm": 20.844703674316406, + "learning_rate": 5e-06, + "loss": 0.9459, + "num_input_tokens_seen": 28139404, + "step": 449 + }, + { + "epoch": 1.4941763727121464, + "loss": 0.9492704272270203, + "loss_ce": 0.0007841180195100605, + "loss_iou": 0.21875, + "loss_num": 0.10205078125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 28139404, + "step": 449 + }, + { + "epoch": 1.497504159733777, + "grad_norm": 25.41834831237793, + "learning_rate": 5e-06, + "loss": 1.3058, + "num_input_tokens_seen": 28201620, + "step": 450 + }, + { + "epoch": 1.497504159733777, + "loss": 1.2704012393951416, + "loss_ce": 0.0028230701573193073, + "loss_iou": 0.427734375, + "loss_num": 0.0830078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 28201620, + "step": 450 + }, + { + "epoch": 1.5008319467554077, + "grad_norm": 35.88543701171875, + "learning_rate": 5e-06, + "loss": 1.1012, + "num_input_tokens_seen": 28265268, + "step": 451 + }, + { + "epoch": 1.5008319467554077, + "loss": 0.7575756311416626, + "loss_ce": 0.001472145551815629, + "loss_iou": 0.287109375, + "loss_num": 0.035888671875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 28265268, + "step": 451 + }, + { + "epoch": 1.5041597337770383, + "grad_norm": 9.400871276855469, + "learning_rate": 5e-06, + "loss": 0.7297, + "num_input_tokens_seen": 28328344, + "step": 452 + }, + { + "epoch": 1.5041597337770383, + "loss": 0.6579368114471436, + "loss_ce": 0.0014427044661715627, + "loss_iou": 0.216796875, + "loss_num": 0.044677734375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 28328344, + "step": 452 + }, + { + "epoch": 1.5074875207986689, + "grad_norm": 16.661739349365234, + "learning_rate": 5e-06, + "loss": 1.0671, + "num_input_tokens_seen": 28390980, + "step": 453 + }, + { + "epoch": 1.5074875207986689, + "loss": 1.226120114326477, + "loss_ce": 0.0010225145379081368, + "loss_iou": 0.345703125, + "loss_num": 0.1064453125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 28390980, + "step": 453 + }, + { + "epoch": 1.5108153078202995, + "grad_norm": 16.2419490814209, + "learning_rate": 5e-06, + "loss": 0.9537, + "num_input_tokens_seen": 28453244, + "step": 454 + }, + { + "epoch": 1.5108153078202995, + "loss": 0.9552655816078186, + "loss_ce": 0.0018964293412864208, + "loss_iou": 0.1962890625, + "loss_num": 0.1123046875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 28453244, + "step": 454 + }, + { + "epoch": 1.51414309484193, + "grad_norm": 22.404499053955078, + "learning_rate": 5e-06, + "loss": 0.8511, + "num_input_tokens_seen": 28515280, + "step": 455 + }, + { + "epoch": 1.51414309484193, + "loss": 0.9287418127059937, + "loss_ce": 0.006378548685461283, + "loss_iou": 0.2734375, + "loss_num": 0.0751953125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 28515280, + "step": 455 + }, + { + "epoch": 1.5174708818635607, + "grad_norm": 23.939455032348633, + "learning_rate": 5e-06, + "loss": 1.3932, + "num_input_tokens_seen": 28578200, + "step": 456 + }, + { + "epoch": 1.5174708818635607, + "loss": 1.485808253288269, + "loss_ce": 0.004362954758107662, + "loss_iou": 0.4765625, + "loss_num": 0.10498046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 28578200, + "step": 456 + }, + { + "epoch": 1.5207986688851913, + "grad_norm": 19.551912307739258, + "learning_rate": 5e-06, + "loss": 0.9394, + "num_input_tokens_seen": 28642336, + "step": 457 + }, + { + "epoch": 1.5207986688851913, + "loss": 0.8611233234405518, + "loss_ce": 3.935792119591497e-05, + "loss_iou": 0.328125, + "loss_num": 0.041015625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 28642336, + "step": 457 + }, + { + "epoch": 1.524126455906822, + "grad_norm": 33.463558197021484, + "learning_rate": 5e-06, + "loss": 1.3023, + "num_input_tokens_seen": 28705660, + "step": 458 + }, + { + "epoch": 1.524126455906822, + "loss": 1.4212629795074463, + "loss_ce": 0.0020735724829137325, + "loss_iou": 0.4296875, + "loss_num": 0.1123046875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 28705660, + "step": 458 + }, + { + "epoch": 1.5274542429284526, + "grad_norm": 32.06713104248047, + "learning_rate": 5e-06, + "loss": 0.8497, + "num_input_tokens_seen": 28768376, + "step": 459 + }, + { + "epoch": 1.5274542429284526, + "loss": 0.9550895690917969, + "loss_ce": 0.0004996892530471087, + "loss_iou": 0.357421875, + "loss_num": 0.04833984375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 28768376, + "step": 459 + }, + { + "epoch": 1.5307820299500832, + "grad_norm": 21.96576690673828, + "learning_rate": 5e-06, + "loss": 0.8911, + "num_input_tokens_seen": 28831008, + "step": 460 + }, + { + "epoch": 1.5307820299500832, + "loss": 0.796554684638977, + "loss_ce": 0.0021211060229688883, + "loss_iou": 0.1943359375, + "loss_num": 0.0810546875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 28831008, + "step": 460 + }, + { + "epoch": 1.5341098169717138, + "grad_norm": 12.529973030090332, + "learning_rate": 5e-06, + "loss": 1.0927, + "num_input_tokens_seen": 28894052, + "step": 461 + }, + { + "epoch": 1.5341098169717138, + "loss": 0.7951915264129639, + "loss_ce": 0.004420042969286442, + "loss_iou": 0.1484375, + "loss_num": 0.0986328125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 28894052, + "step": 461 + }, + { + "epoch": 1.5374376039933444, + "grad_norm": 21.059383392333984, + "learning_rate": 5e-06, + "loss": 0.8035, + "num_input_tokens_seen": 28956284, + "step": 462 + }, + { + "epoch": 1.5374376039933444, + "loss": 0.6268208026885986, + "loss_ce": 0.001698711421340704, + "loss_iou": 0.1845703125, + "loss_num": 0.05126953125, + "loss_xval": 0.625, + "num_input_tokens_seen": 28956284, + "step": 462 + }, + { + "epoch": 1.540765391014975, + "grad_norm": 17.505203247070312, + "learning_rate": 5e-06, + "loss": 1.2985, + "num_input_tokens_seen": 29020736, + "step": 463 + }, + { + "epoch": 1.540765391014975, + "loss": 1.3152070045471191, + "loss_ce": 0.0017303972272202373, + "loss_iou": 0.42578125, + "loss_num": 0.09228515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 29020736, + "step": 463 + }, + { + "epoch": 1.5440931780366056, + "grad_norm": 13.60715389251709, + "learning_rate": 5e-06, + "loss": 0.8746, + "num_input_tokens_seen": 29082664, + "step": 464 + }, + { + "epoch": 1.5440931780366056, + "loss": 1.051095962524414, + "loss_ce": 0.002511953003704548, + "loss_iou": 0.345703125, + "loss_num": 0.0712890625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 29082664, + "step": 464 + }, + { + "epoch": 1.5474209650582362, + "grad_norm": 22.727663040161133, + "learning_rate": 5e-06, + "loss": 0.8494, + "num_input_tokens_seen": 29146680, + "step": 465 + }, + { + "epoch": 1.5474209650582362, + "loss": 1.0247268676757812, + "loss_ce": 0.0010452390415593982, + "loss_iou": 0.33984375, + "loss_num": 0.06884765625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 29146680, + "step": 465 + }, + { + "epoch": 1.5507487520798668, + "grad_norm": 19.767471313476562, + "learning_rate": 5e-06, + "loss": 0.9335, + "num_input_tokens_seen": 29210188, + "step": 466 + }, + { + "epoch": 1.5507487520798668, + "loss": 1.0263903141021729, + "loss_ce": 0.00368527346290648, + "loss_iou": 0.34765625, + "loss_num": 0.06494140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 29210188, + "step": 466 + }, + { + "epoch": 1.5540765391014975, + "grad_norm": 11.171165466308594, + "learning_rate": 5e-06, + "loss": 0.9697, + "num_input_tokens_seen": 29272836, + "step": 467 + }, + { + "epoch": 1.5540765391014975, + "loss": 0.898423969745636, + "loss_ce": 0.006089954171329737, + "loss_iou": 0.25390625, + "loss_num": 0.07763671875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 29272836, + "step": 467 + }, + { + "epoch": 1.557404326123128, + "grad_norm": 12.950803756713867, + "learning_rate": 5e-06, + "loss": 0.8517, + "num_input_tokens_seen": 29334744, + "step": 468 + }, + { + "epoch": 1.557404326123128, + "loss": 1.0840175151824951, + "loss_ce": 0.008089832961559296, + "loss_iou": 0.33203125, + "loss_num": 0.08203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 29334744, + "step": 468 + }, + { + "epoch": 1.5607321131447587, + "grad_norm": 43.52952194213867, + "learning_rate": 5e-06, + "loss": 1.2985, + "num_input_tokens_seen": 29398080, + "step": 469 + }, + { + "epoch": 1.5607321131447587, + "loss": 1.0282366275787354, + "loss_ce": 0.003090148326009512, + "loss_iou": 0.28125, + "loss_num": 0.0927734375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 29398080, + "step": 469 + }, + { + "epoch": 1.5640599001663893, + "grad_norm": 49.79926681518555, + "learning_rate": 5e-06, + "loss": 0.9525, + "num_input_tokens_seen": 29461576, + "step": 470 + }, + { + "epoch": 1.5640599001663893, + "loss": 0.6735060214996338, + "loss_ce": 0.0019971991423517466, + "loss_iou": 0.23046875, + "loss_num": 0.042236328125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 29461576, + "step": 470 + }, + { + "epoch": 1.56738768718802, + "grad_norm": 13.252382278442383, + "learning_rate": 5e-06, + "loss": 0.929, + "num_input_tokens_seen": 29524476, + "step": 471 + }, + { + "epoch": 1.56738768718802, + "loss": 0.9817473888397217, + "loss_ce": 0.002011068630963564, + "loss_iou": 0.33984375, + "loss_num": 0.06005859375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 29524476, + "step": 471 + }, + { + "epoch": 1.5707154742096505, + "grad_norm": 14.923233985900879, + "learning_rate": 5e-06, + "loss": 1.1227, + "num_input_tokens_seen": 29587796, + "step": 472 + }, + { + "epoch": 1.5707154742096505, + "loss": 1.3451569080352783, + "loss_ce": 0.00043034314876422286, + "loss_iou": 0.443359375, + "loss_num": 0.0908203125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 29587796, + "step": 472 + }, + { + "epoch": 1.5740432612312811, + "grad_norm": 15.69069766998291, + "learning_rate": 5e-06, + "loss": 0.9072, + "num_input_tokens_seen": 29650672, + "step": 473 + }, + { + "epoch": 1.5740432612312811, + "loss": 0.7996507883071899, + "loss_ce": 0.003264050930738449, + "loss_iou": 0.2490234375, + "loss_num": 0.0595703125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 29650672, + "step": 473 + }, + { + "epoch": 1.5773710482529117, + "grad_norm": 28.606983184814453, + "learning_rate": 5e-06, + "loss": 0.9883, + "num_input_tokens_seen": 29714604, + "step": 474 + }, + { + "epoch": 1.5773710482529117, + "loss": 0.6658027768135071, + "loss_ce": 0.0002754491288214922, + "loss_iou": 0.2099609375, + "loss_num": 0.04931640625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 29714604, + "step": 474 + }, + { + "epoch": 1.5806988352745424, + "grad_norm": 14.635608673095703, + "learning_rate": 5e-06, + "loss": 0.9022, + "num_input_tokens_seen": 29777760, + "step": 475 + }, + { + "epoch": 1.5806988352745424, + "loss": 0.7488625049591064, + "loss_ce": 0.012534376233816147, + "loss_iou": 0.2421875, + "loss_num": 0.050537109375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 29777760, + "step": 475 + }, + { + "epoch": 1.584026622296173, + "grad_norm": 23.2712459564209, + "learning_rate": 5e-06, + "loss": 0.7148, + "num_input_tokens_seen": 29838992, + "step": 476 + }, + { + "epoch": 1.584026622296173, + "loss": 0.6552135944366455, + "loss_ce": 0.00030633312417194247, + "loss_iou": 0.216796875, + "loss_num": 0.04443359375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 29838992, + "step": 476 + }, + { + "epoch": 1.5873544093178036, + "grad_norm": 15.995981216430664, + "learning_rate": 5e-06, + "loss": 0.807, + "num_input_tokens_seen": 29902632, + "step": 477 + }, + { + "epoch": 1.5873544093178036, + "loss": 0.7012355327606201, + "loss_ce": 0.0023830283898860216, + "loss_iou": 0.232421875, + "loss_num": 0.046875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 29902632, + "step": 477 + }, + { + "epoch": 1.5906821963394342, + "grad_norm": 17.334726333618164, + "learning_rate": 5e-06, + "loss": 0.8927, + "num_input_tokens_seen": 29965592, + "step": 478 + }, + { + "epoch": 1.5906821963394342, + "loss": 1.0914649963378906, + "loss_ce": 0.0006445984472520649, + "loss_iou": 0.345703125, + "loss_num": 0.080078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 29965592, + "step": 478 + }, + { + "epoch": 1.5940099833610648, + "grad_norm": 22.14893913269043, + "learning_rate": 5e-06, + "loss": 0.9355, + "num_input_tokens_seen": 30029944, + "step": 479 + }, + { + "epoch": 1.5940099833610648, + "loss": 0.8586336374282837, + "loss_ce": 0.0011202013120055199, + "loss_iou": 0.296875, + "loss_num": 0.052734375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 30029944, + "step": 479 + }, + { + "epoch": 1.5973377703826954, + "grad_norm": 15.957133293151855, + "learning_rate": 5e-06, + "loss": 0.7887, + "num_input_tokens_seen": 30091948, + "step": 480 + }, + { + "epoch": 1.5973377703826954, + "loss": 0.7341344356536865, + "loss_ce": 0.0012242539087310433, + "loss_iou": 0.19140625, + "loss_num": 0.0703125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 30091948, + "step": 480 + }, + { + "epoch": 1.600665557404326, + "grad_norm": 12.82165241241455, + "learning_rate": 5e-06, + "loss": 0.9523, + "num_input_tokens_seen": 30153712, + "step": 481 + }, + { + "epoch": 1.600665557404326, + "loss": 0.8892770409584045, + "loss_ce": 0.0006051433738321066, + "loss_iou": 0.328125, + "loss_num": 0.046630859375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 30153712, + "step": 481 + }, + { + "epoch": 1.6039933444259566, + "grad_norm": 16.93235206604004, + "learning_rate": 5e-06, + "loss": 1.0522, + "num_input_tokens_seen": 30216844, + "step": 482 + }, + { + "epoch": 1.6039933444259566, + "loss": 1.1036779880523682, + "loss_ce": 0.0011389621067792177, + "loss_iou": 0.267578125, + "loss_num": 0.11376953125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 30216844, + "step": 482 + }, + { + "epoch": 1.6073211314475873, + "grad_norm": 12.133223533630371, + "learning_rate": 5e-06, + "loss": 0.8506, + "num_input_tokens_seen": 30278368, + "step": 483 + }, + { + "epoch": 1.6073211314475873, + "loss": 0.9850232601165771, + "loss_ce": 0.0001599617680767551, + "loss_iou": 0.30859375, + "loss_num": 0.07373046875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 30278368, + "step": 483 + }, + { + "epoch": 1.6106489184692179, + "grad_norm": 44.32785415649414, + "learning_rate": 5e-06, + "loss": 0.9432, + "num_input_tokens_seen": 30339888, + "step": 484 + }, + { + "epoch": 1.6106489184692179, + "loss": 1.1375561952590942, + "loss_ce": 0.00022712742793373764, + "loss_iou": 0.306640625, + "loss_num": 0.10498046875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 30339888, + "step": 484 + }, + { + "epoch": 1.6139767054908485, + "grad_norm": 10.50809383392334, + "learning_rate": 5e-06, + "loss": 0.9693, + "num_input_tokens_seen": 30402144, + "step": 485 + }, + { + "epoch": 1.6139767054908485, + "loss": 0.9972724914550781, + "loss_ce": 0.000446375401224941, + "loss_iou": 0.328125, + "loss_num": 0.06787109375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 30402144, + "step": 485 + }, + { + "epoch": 1.617304492512479, + "grad_norm": 15.496356010437012, + "learning_rate": 5e-06, + "loss": 1.2141, + "num_input_tokens_seen": 30465452, + "step": 486 + }, + { + "epoch": 1.617304492512479, + "loss": 1.3970156908035278, + "loss_ce": 0.0010195414070039988, + "loss_iou": 0.421875, + "loss_num": 0.1103515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 30465452, + "step": 486 + }, + { + "epoch": 1.6206322795341097, + "grad_norm": 36.90744400024414, + "learning_rate": 5e-06, + "loss": 1.0141, + "num_input_tokens_seen": 30529608, + "step": 487 + }, + { + "epoch": 1.6206322795341097, + "loss": 1.0115238428115845, + "loss_ce": 0.0037113595753908157, + "loss_iou": 0.365234375, + "loss_num": 0.05517578125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 30529608, + "step": 487 + }, + { + "epoch": 1.6239600665557403, + "grad_norm": 20.59255027770996, + "learning_rate": 5e-06, + "loss": 1.0354, + "num_input_tokens_seen": 30591308, + "step": 488 + }, + { + "epoch": 1.6239600665557403, + "loss": 1.2096441984176636, + "loss_ce": 0.0016364282928407192, + "loss_iou": 0.375, + "loss_num": 0.091796875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 30591308, + "step": 488 + }, + { + "epoch": 1.627287853577371, + "grad_norm": 24.88413429260254, + "learning_rate": 5e-06, + "loss": 0.9571, + "num_input_tokens_seen": 30654808, + "step": 489 + }, + { + "epoch": 1.627287853577371, + "loss": 0.9968461394309998, + "loss_ce": 0.003926191478967667, + "loss_iou": 0.32421875, + "loss_num": 0.06884765625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 30654808, + "step": 489 + }, + { + "epoch": 1.6306156405990015, + "grad_norm": 24.321149826049805, + "learning_rate": 5e-06, + "loss": 1.1118, + "num_input_tokens_seen": 30719144, + "step": 490 + }, + { + "epoch": 1.6306156405990015, + "loss": 0.9674986600875854, + "loss_ce": 0.00192247552331537, + "loss_iou": 0.322265625, + "loss_num": 0.064453125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 30719144, + "step": 490 + }, + { + "epoch": 1.6339434276206322, + "grad_norm": 18.081993103027344, + "learning_rate": 5e-06, + "loss": 0.905, + "num_input_tokens_seen": 30780976, + "step": 491 + }, + { + "epoch": 1.6339434276206322, + "loss": 0.8123141527175903, + "loss_ce": 0.0009127893717959523, + "loss_iou": 0.2353515625, + "loss_num": 0.06787109375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 30780976, + "step": 491 + }, + { + "epoch": 1.6372712146422628, + "grad_norm": 38.994476318359375, + "learning_rate": 5e-06, + "loss": 0.9762, + "num_input_tokens_seen": 30843968, + "step": 492 + }, + { + "epoch": 1.6372712146422628, + "loss": 0.8970677852630615, + "loss_ce": 9.50983667280525e-05, + "loss_iou": 0.26171875, + "loss_num": 0.07470703125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 30843968, + "step": 492 + }, + { + "epoch": 1.6405990016638934, + "grad_norm": 19.654754638671875, + "learning_rate": 5e-06, + "loss": 1.1088, + "num_input_tokens_seen": 30904740, + "step": 493 + }, + { + "epoch": 1.6405990016638934, + "loss": 1.1129732131958008, + "loss_ce": 0.0016451734118163586, + "loss_iou": 0.330078125, + "loss_num": 0.09033203125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 30904740, + "step": 493 + }, + { + "epoch": 1.643926788685524, + "grad_norm": 18.252052307128906, + "learning_rate": 5e-06, + "loss": 0.7697, + "num_input_tokens_seen": 30968000, + "step": 494 + }, + { + "epoch": 1.643926788685524, + "loss": 0.8988114595413208, + "loss_ce": 0.0008622044115327299, + "loss_iou": 0.34765625, + "loss_num": 0.040771484375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 30968000, + "step": 494 + }, + { + "epoch": 1.6472545757071546, + "grad_norm": 11.877856254577637, + "learning_rate": 5e-06, + "loss": 0.7778, + "num_input_tokens_seen": 31031844, + "step": 495 + }, + { + "epoch": 1.6472545757071546, + "loss": 0.8768675327301025, + "loss_ce": 0.0011351365828886628, + "loss_iou": 0.2314453125, + "loss_num": 0.08251953125, + "loss_xval": 0.875, + "num_input_tokens_seen": 31031844, + "step": 495 + }, + { + "epoch": 1.6505823627287852, + "grad_norm": 16.35790252685547, + "learning_rate": 5e-06, + "loss": 1.1891, + "num_input_tokens_seen": 31095556, + "step": 496 + }, + { + "epoch": 1.6505823627287852, + "loss": 1.4308223724365234, + "loss_ce": 0.0001583620032761246, + "loss_iou": 0.4453125, + "loss_num": 0.1083984375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 31095556, + "step": 496 + }, + { + "epoch": 1.6539101497504158, + "grad_norm": 12.555450439453125, + "learning_rate": 5e-06, + "loss": 0.8734, + "num_input_tokens_seen": 31158156, + "step": 497 + }, + { + "epoch": 1.6539101497504158, + "loss": 0.7806690335273743, + "loss_ce": 0.007719771936535835, + "loss_iou": 0.259765625, + "loss_num": 0.051025390625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 31158156, + "step": 497 + }, + { + "epoch": 1.6572379367720464, + "grad_norm": 12.337371826171875, + "learning_rate": 5e-06, + "loss": 1.1544, + "num_input_tokens_seen": 31221496, + "step": 498 + }, + { + "epoch": 1.6572379367720464, + "loss": 1.3280014991760254, + "loss_ce": 0.0008530584746040404, + "loss_iou": 0.4375, + "loss_num": 0.0908203125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 31221496, + "step": 498 + }, + { + "epoch": 1.660565723793677, + "grad_norm": 16.143665313720703, + "learning_rate": 5e-06, + "loss": 1.252, + "num_input_tokens_seen": 31285016, + "step": 499 + }, + { + "epoch": 1.660565723793677, + "loss": 1.2564626932144165, + "loss_ce": 0.0003592088760342449, + "loss_iou": 0.435546875, + "loss_num": 0.07666015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 31285016, + "step": 499 + }, + { + "epoch": 1.6638935108153077, + "grad_norm": 10.787676811218262, + "learning_rate": 5e-06, + "loss": 0.975, + "num_input_tokens_seen": 31348120, + "step": 500 + }, + { + "epoch": 1.6638935108153077, + "eval_seeclick_CIoU": 0.13085231557488441, + "eval_seeclick_GIoU": 0.1558702141046524, + "eval_seeclick_IoU": 0.23901727050542831, + "eval_seeclick_MAE_all": 0.18714486062526703, + "eval_seeclick_MAE_h": 0.07685794495046139, + "eval_seeclick_MAE_w": 0.13620607554912567, + "eval_seeclick_MAE_x_boxes": 0.32132112979888916, + "eval_seeclick_MAE_y_boxes": 0.09800946339964867, + "eval_seeclick_NUM_probability": 0.9999272227287292, + "eval_seeclick_inside_bbox": 0.3541666716337204, + "eval_seeclick_loss": 2.6794745922088623, + "eval_seeclick_loss_ce": 0.06953983008861542, + "eval_seeclick_loss_iou": 0.837890625, + "eval_seeclick_loss_num": 0.179595947265625, + "eval_seeclick_loss_xval": 2.574462890625, + "eval_seeclick_runtime": 65.7063, + "eval_seeclick_samples_per_second": 0.715, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 31348120, + "step": 500 + }, + { + "epoch": 1.6638935108153077, + "eval_icons_CIoU": 0.07393881864845753, + "eval_icons_GIoU": 0.19662170112133026, + "eval_icons_IoU": 0.21981175243854523, + "eval_icons_MAE_all": 0.13982237502932549, + "eval_icons_MAE_h": 0.07564222812652588, + "eval_icons_MAE_w": 0.15035200864076614, + "eval_icons_MAE_x_boxes": 0.12051251530647278, + "eval_icons_MAE_y_boxes": 0.049114322289824486, + "eval_icons_NUM_probability": 0.9999871253967285, + "eval_icons_inside_bbox": 0.3420138955116272, + "eval_icons_loss": 2.3178176879882812, + "eval_icons_loss_ce": 5.301104465615936e-06, + "eval_icons_loss_iou": 0.798095703125, + "eval_icons_loss_num": 0.14259719848632812, + "eval_icons_loss_xval": 2.310546875, + "eval_icons_runtime": 65.5349, + "eval_icons_samples_per_second": 0.763, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 31348120, + "step": 500 + }, + { + "epoch": 1.6638935108153077, + "eval_screenspot_CIoU": 0.03867738569776217, + "eval_screenspot_GIoU": 0.1068417305747668, + "eval_screenspot_IoU": 0.18955722451210022, + "eval_screenspot_MAE_all": 0.20505708952744803, + "eval_screenspot_MAE_h": 0.07092766960461934, + "eval_screenspot_MAE_w": 0.17401040097077689, + "eval_screenspot_MAE_x_boxes": 0.2769670287768046, + "eval_screenspot_MAE_y_boxes": 0.1273154765367508, + "eval_screenspot_NUM_probability": 0.9997655948003134, + "eval_screenspot_inside_bbox": 0.31791667143503827, + "eval_screenspot_loss": 2.826397657394409, + "eval_screenspot_loss_ce": 0.0007081345732634267, + "eval_screenspot_loss_iou": 0.9080403645833334, + "eval_screenspot_loss_num": 0.20873006184895834, + "eval_screenspot_loss_xval": 2.8587239583333335, + "eval_screenspot_runtime": 123.0498, + "eval_screenspot_samples_per_second": 0.723, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 31348120, + "step": 500 + }, + { + "epoch": 1.6638935108153077, + "eval_compot_CIoU": -0.07353978231549263, + "eval_compot_GIoU": 0.04984145052731037, + "eval_compot_IoU": 0.10792786628007889, + "eval_compot_MAE_all": 0.2465325966477394, + "eval_compot_MAE_h": 0.08729888498783112, + "eval_compot_MAE_w": 0.27648504078388214, + "eval_compot_MAE_x_boxes": 0.19644539058208466, + "eval_compot_MAE_y_boxes": 0.16193149238824844, + "eval_compot_NUM_probability": 0.9998366832733154, + "eval_compot_inside_bbox": 0.16840277798473835, + "eval_compot_loss": 3.0365827083587646, + "eval_compot_loss_ce": 0.005284860031679273, + "eval_compot_loss_iou": 0.947509765625, + "eval_compot_loss_num": 0.235687255859375, + "eval_compot_loss_xval": 3.072265625, + "eval_compot_runtime": 67.1863, + "eval_compot_samples_per_second": 0.744, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 31348120, + "step": 500 + }, + { + "epoch": 1.6638935108153077, + "eval_custom_ui_MAE_all": 0.09588127583265305, + "eval_custom_ui_MAE_x": 0.10331301391124725, + "eval_custom_ui_MAE_y": 0.08844954147934914, + "eval_custom_ui_NUM_probability": 0.9999941885471344, + "eval_custom_ui_loss": 0.48714783787727356, + "eval_custom_ui_loss_ce": 0.00020886939091724344, + "eval_custom_ui_loss_num": 0.0944366455078125, + "eval_custom_ui_loss_xval": 0.4720458984375, + "eval_custom_ui_runtime": 51.6926, + "eval_custom_ui_samples_per_second": 0.967, + "eval_custom_ui_steps_per_second": 0.039, + "num_input_tokens_seen": 31348120, + "step": 500 + }, + { + "epoch": 1.6638935108153077, + "loss": 0.44073396921157837, + "loss_ce": 0.0003042669268324971, + "loss_iou": 0.0, + "loss_num": 0.087890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 31348120, + "step": 500 + }, + { + "epoch": 1.6672212978369383, + "grad_norm": 19.1532039642334, + "learning_rate": 5e-06, + "loss": 1.0931, + "num_input_tokens_seen": 31411648, + "step": 501 + }, + { + "epoch": 1.6672212978369383, + "loss": 1.0893843173980713, + "loss_ce": 0.0014936134684830904, + "loss_iou": 0.345703125, + "loss_num": 0.0791015625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 31411648, + "step": 501 + }, + { + "epoch": 1.670549084858569, + "grad_norm": 41.87156295776367, + "learning_rate": 5e-06, + "loss": 1.1134, + "num_input_tokens_seen": 31474148, + "step": 502 + }, + { + "epoch": 1.670549084858569, + "loss": 0.7424819469451904, + "loss_ce": 0.0009047660860233009, + "loss_iou": 0.21875, + "loss_num": 0.06103515625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 31474148, + "step": 502 + }, + { + "epoch": 1.6738768718801995, + "grad_norm": 26.744535446166992, + "learning_rate": 5e-06, + "loss": 0.9912, + "num_input_tokens_seen": 31538180, + "step": 503 + }, + { + "epoch": 1.6738768718801995, + "loss": 1.1642227172851562, + "loss_ce": 0.0023575148079544306, + "loss_iou": 0.396484375, + "loss_num": 0.07421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 31538180, + "step": 503 + }, + { + "epoch": 1.6772046589018301, + "grad_norm": 10.769326210021973, + "learning_rate": 5e-06, + "loss": 1.1929, + "num_input_tokens_seen": 31601972, + "step": 504 + }, + { + "epoch": 1.6772046589018301, + "loss": 1.1387523412704468, + "loss_ce": 0.0005688150995410979, + "loss_iou": 0.3671875, + "loss_num": 0.08056640625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 31601972, + "step": 504 + }, + { + "epoch": 1.6805324459234607, + "grad_norm": 24.930686950683594, + "learning_rate": 5e-06, + "loss": 1.0095, + "num_input_tokens_seen": 31665256, + "step": 505 + }, + { + "epoch": 1.6805324459234607, + "loss": 1.1174432039260864, + "loss_ce": 0.000499797286465764, + "loss_iou": 0.37109375, + "loss_num": 0.07470703125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 31665256, + "step": 505 + }, + { + "epoch": 1.6838602329450914, + "grad_norm": 11.738724708557129, + "learning_rate": 5e-06, + "loss": 0.7514, + "num_input_tokens_seen": 31726716, + "step": 506 + }, + { + "epoch": 1.6838602329450914, + "loss": 0.5557652711868286, + "loss_ce": 0.0007115780026651919, + "loss_iou": 0.173828125, + "loss_num": 0.04150390625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 31726716, + "step": 506 + }, + { + "epoch": 1.687188019966722, + "grad_norm": 8.881688117980957, + "learning_rate": 5e-06, + "loss": 0.8519, + "num_input_tokens_seen": 31789836, + "step": 507 + }, + { + "epoch": 1.687188019966722, + "loss": 0.9876424670219421, + "loss_ce": 0.00033773150062188506, + "loss_iou": 0.25, + "loss_num": 0.09716796875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 31789836, + "step": 507 + }, + { + "epoch": 1.6905158069883528, + "grad_norm": 13.23851490020752, + "learning_rate": 5e-06, + "loss": 1.0906, + "num_input_tokens_seen": 31853316, + "step": 508 + }, + { + "epoch": 1.6905158069883528, + "loss": 1.2013055086135864, + "loss_ce": 0.0006219673086889088, + "loss_iou": 0.31640625, + "loss_num": 0.11328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 31853316, + "step": 508 + }, + { + "epoch": 1.6938435940099834, + "grad_norm": 32.30921173095703, + "learning_rate": 5e-06, + "loss": 1.0942, + "num_input_tokens_seen": 31916364, + "step": 509 + }, + { + "epoch": 1.6938435940099834, + "loss": 1.0613774061203003, + "loss_ce": 0.0005864131962880492, + "loss_iou": 0.3046875, + "loss_num": 0.09033203125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 31916364, + "step": 509 + }, + { + "epoch": 1.697171381031614, + "grad_norm": 21.38906478881836, + "learning_rate": 5e-06, + "loss": 1.1213, + "num_input_tokens_seen": 31979748, + "step": 510 + }, + { + "epoch": 1.697171381031614, + "loss": 1.2083408832550049, + "loss_ce": 0.04061625152826309, + "loss_iou": 0.359375, + "loss_num": 0.08984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 31979748, + "step": 510 + }, + { + "epoch": 1.7004991680532446, + "grad_norm": 27.368276596069336, + "learning_rate": 5e-06, + "loss": 1.2503, + "num_input_tokens_seen": 32042528, + "step": 511 + }, + { + "epoch": 1.7004991680532446, + "loss": 1.3059027194976807, + "loss_ce": 0.0012151505798101425, + "loss_iou": 0.5, + "loss_num": 0.060791015625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 32042528, + "step": 511 + }, + { + "epoch": 1.7038269550748752, + "grad_norm": 11.777514457702637, + "learning_rate": 5e-06, + "loss": 1.0625, + "num_input_tokens_seen": 32105152, + "step": 512 + }, + { + "epoch": 1.7038269550748752, + "loss": 0.9886508584022522, + "loss_ce": 0.000857874343637377, + "loss_iou": 0.357421875, + "loss_num": 0.054443359375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 32105152, + "step": 512 + }, + { + "epoch": 1.7071547420965059, + "grad_norm": 19.91946792602539, + "learning_rate": 5e-06, + "loss": 0.7893, + "num_input_tokens_seen": 32167704, + "step": 513 + }, + { + "epoch": 1.7071547420965059, + "loss": 0.8008160591125488, + "loss_ce": 0.0010113224852830172, + "loss_iou": 0.265625, + "loss_num": 0.0537109375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 32167704, + "step": 513 + }, + { + "epoch": 1.7104825291181365, + "grad_norm": 24.025108337402344, + "learning_rate": 5e-06, + "loss": 1.0882, + "num_input_tokens_seen": 32231292, + "step": 514 + }, + { + "epoch": 1.7104825291181365, + "loss": 1.2957994937896729, + "loss_ce": 0.0008775498135946691, + "loss_iou": 0.453125, + "loss_num": 0.07763671875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 32231292, + "step": 514 + }, + { + "epoch": 1.713810316139767, + "grad_norm": 22.541730880737305, + "learning_rate": 5e-06, + "loss": 0.9815, + "num_input_tokens_seen": 32294084, + "step": 515 + }, + { + "epoch": 1.713810316139767, + "loss": 1.1235849857330322, + "loss_ce": 0.0005380419315770268, + "loss_iou": 0.37890625, + "loss_num": 0.07275390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 32294084, + "step": 515 + }, + { + "epoch": 1.7171381031613977, + "grad_norm": 17.039142608642578, + "learning_rate": 5e-06, + "loss": 0.8633, + "num_input_tokens_seen": 32355804, + "step": 516 + }, + { + "epoch": 1.7171381031613977, + "loss": 0.8609045147895813, + "loss_ce": 0.0012853904627263546, + "loss_iou": 0.228515625, + "loss_num": 0.080078125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 32355804, + "step": 516 + }, + { + "epoch": 1.7204658901830283, + "grad_norm": 17.313488006591797, + "learning_rate": 5e-06, + "loss": 1.0722, + "num_input_tokens_seen": 32418416, + "step": 517 + }, + { + "epoch": 1.7204658901830283, + "loss": 0.8409963846206665, + "loss_ce": 0.0006643726374022663, + "loss_iou": 0.283203125, + "loss_num": 0.054931640625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 32418416, + "step": 517 + }, + { + "epoch": 1.723793677204659, + "grad_norm": 21.147789001464844, + "learning_rate": 5e-06, + "loss": 0.9084, + "num_input_tokens_seen": 32480452, + "step": 518 + }, + { + "epoch": 1.723793677204659, + "loss": 0.6569137573242188, + "loss_ce": 0.00041967950528487563, + "loss_iou": 0.166015625, + "loss_num": 0.06494140625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 32480452, + "step": 518 + }, + { + "epoch": 1.7271214642262895, + "grad_norm": 14.255158424377441, + "learning_rate": 5e-06, + "loss": 0.9042, + "num_input_tokens_seen": 32543352, + "step": 519 + }, + { + "epoch": 1.7271214642262895, + "loss": 0.9606277346611023, + "loss_ce": 0.0011550780618563294, + "loss_iou": 0.359375, + "loss_num": 0.0478515625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 32543352, + "step": 519 + }, + { + "epoch": 1.7304492512479202, + "grad_norm": 23.895132064819336, + "learning_rate": 5e-06, + "loss": 1.0736, + "num_input_tokens_seen": 32606656, + "step": 520 + }, + { + "epoch": 1.7304492512479202, + "loss": 0.9719319343566895, + "loss_ce": 0.004036390222609043, + "loss_iou": 0.306640625, + "loss_num": 0.07080078125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 32606656, + "step": 520 + }, + { + "epoch": 1.7337770382695508, + "grad_norm": 28.39651870727539, + "learning_rate": 5e-06, + "loss": 1.2689, + "num_input_tokens_seen": 32668332, + "step": 521 + }, + { + "epoch": 1.7337770382695508, + "loss": 1.0842338800430298, + "loss_ce": 0.001958527136594057, + "loss_iou": 0.3203125, + "loss_num": 0.0888671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 32668332, + "step": 521 + }, + { + "epoch": 1.7371048252911814, + "grad_norm": 14.18319320678711, + "learning_rate": 5e-06, + "loss": 1.1421, + "num_input_tokens_seen": 32730852, + "step": 522 + }, + { + "epoch": 1.7371048252911814, + "loss": 1.0944100618362427, + "loss_ce": 4.9661710363579914e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0859375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 32730852, + "step": 522 + }, + { + "epoch": 1.740432612312812, + "grad_norm": 20.83157730102539, + "learning_rate": 5e-06, + "loss": 1.2773, + "num_input_tokens_seen": 32795496, + "step": 523 + }, + { + "epoch": 1.740432612312812, + "loss": 1.1278488636016846, + "loss_ce": 0.004802019335329533, + "loss_iou": 0.421875, + "loss_num": 0.055908203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 32795496, + "step": 523 + }, + { + "epoch": 1.7437603993344426, + "grad_norm": 16.72551918029785, + "learning_rate": 5e-06, + "loss": 0.9721, + "num_input_tokens_seen": 32860204, + "step": 524 + }, + { + "epoch": 1.7437603993344426, + "loss": 1.024550437927246, + "loss_ce": 0.0004415763833094388, + "loss_iou": 0.3671875, + "loss_num": 0.05810546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 32860204, + "step": 524 + }, + { + "epoch": 1.7470881863560732, + "grad_norm": 11.438901901245117, + "learning_rate": 5e-06, + "loss": 0.8762, + "num_input_tokens_seen": 32923856, + "step": 525 + }, + { + "epoch": 1.7470881863560732, + "loss": 1.0130120515823364, + "loss_ce": 0.001415381790138781, + "loss_iou": 0.365234375, + "loss_num": 0.0556640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 32923856, + "step": 525 + }, + { + "epoch": 1.7504159733777038, + "grad_norm": 8.21237850189209, + "learning_rate": 5e-06, + "loss": 0.7951, + "num_input_tokens_seen": 32985220, + "step": 526 + }, + { + "epoch": 1.7504159733777038, + "loss": 0.9805971384048462, + "loss_ce": 0.00037258450174704194, + "loss_iou": 0.31640625, + "loss_num": 0.0693359375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 32985220, + "step": 526 + }, + { + "epoch": 1.7537437603993344, + "grad_norm": 9.287155151367188, + "learning_rate": 5e-06, + "loss": 0.9109, + "num_input_tokens_seen": 33048680, + "step": 527 + }, + { + "epoch": 1.7537437603993344, + "loss": 0.9192065596580505, + "loss_ce": 0.0012378038372844458, + "loss_iou": 0.322265625, + "loss_num": 0.0546875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 33048680, + "step": 527 + }, + { + "epoch": 1.757071547420965, + "grad_norm": 46.055049896240234, + "learning_rate": 5e-06, + "loss": 1.0704, + "num_input_tokens_seen": 33112220, + "step": 528 + }, + { + "epoch": 1.757071547420965, + "loss": 1.060153841972351, + "loss_ce": 0.0010718015255406499, + "loss_iou": 0.34765625, + "loss_num": 0.07275390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 33112220, + "step": 528 + }, + { + "epoch": 1.7603993344425957, + "grad_norm": 27.12957000732422, + "learning_rate": 5e-06, + "loss": 0.865, + "num_input_tokens_seen": 33172044, + "step": 529 + }, + { + "epoch": 1.7603993344425957, + "loss": 0.9046726226806641, + "loss_ce": 0.0001316153648076579, + "loss_iou": 0.26171875, + "loss_num": 0.076171875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 33172044, + "step": 529 + }, + { + "epoch": 1.7637271214642263, + "grad_norm": 14.900341987609863, + "learning_rate": 5e-06, + "loss": 0.811, + "num_input_tokens_seen": 33234976, + "step": 530 + }, + { + "epoch": 1.7637271214642263, + "loss": 0.8328965306282043, + "loss_ce": 0.00037697955849580467, + "loss_iou": 0.294921875, + "loss_num": 0.04833984375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 33234976, + "step": 530 + }, + { + "epoch": 1.767054908485857, + "grad_norm": 19.717370986938477, + "learning_rate": 5e-06, + "loss": 1.0421, + "num_input_tokens_seen": 33298716, + "step": 531 + }, + { + "epoch": 1.767054908485857, + "loss": 1.0288456678390503, + "loss_ce": 0.0005254416028037667, + "loss_iou": 0.35546875, + "loss_num": 0.0634765625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 33298716, + "step": 531 + }, + { + "epoch": 1.7703826955074875, + "grad_norm": 14.97089672088623, + "learning_rate": 5e-06, + "loss": 0.7139, + "num_input_tokens_seen": 33359296, + "step": 532 + }, + { + "epoch": 1.7703826955074875, + "loss": 0.6194471716880798, + "loss_ce": 0.00262587983161211, + "loss_iou": 0.220703125, + "loss_num": 0.03515625, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 33359296, + "step": 532 + }, + { + "epoch": 1.7737104825291181, + "grad_norm": 10.585949897766113, + "learning_rate": 5e-06, + "loss": 0.893, + "num_input_tokens_seen": 33420788, + "step": 533 + }, + { + "epoch": 1.7737104825291181, + "loss": 0.8950399160385132, + "loss_ce": 0.0002645118802320212, + "loss_iou": 0.3125, + "loss_num": 0.053955078125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 33420788, + "step": 533 + }, + { + "epoch": 1.7770382695507487, + "grad_norm": 10.35478687286377, + "learning_rate": 5e-06, + "loss": 1.0367, + "num_input_tokens_seen": 33484684, + "step": 534 + }, + { + "epoch": 1.7770382695507487, + "loss": 1.3992263078689575, + "loss_ce": 5.641989628202282e-05, + "loss_iou": 0.427734375, + "loss_num": 0.1083984375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 33484684, + "step": 534 + }, + { + "epoch": 1.7803660565723793, + "grad_norm": 13.746817588806152, + "learning_rate": 5e-06, + "loss": 0.9671, + "num_input_tokens_seen": 33547376, + "step": 535 + }, + { + "epoch": 1.7803660565723793, + "loss": 1.0142176151275635, + "loss_ce": 5.745934322476387e-05, + "loss_iou": 0.349609375, + "loss_num": 0.06298828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 33547376, + "step": 535 + }, + { + "epoch": 1.78369384359401, + "grad_norm": 16.677959442138672, + "learning_rate": 5e-06, + "loss": 1.0859, + "num_input_tokens_seen": 33610036, + "step": 536 + }, + { + "epoch": 1.78369384359401, + "loss": 1.0913352966308594, + "loss_ce": 0.00027076914557255805, + "loss_iou": 0.328125, + "loss_num": 0.08642578125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 33610036, + "step": 536 + }, + { + "epoch": 1.7870216306156406, + "grad_norm": 10.253283500671387, + "learning_rate": 5e-06, + "loss": 0.7858, + "num_input_tokens_seen": 33672984, + "step": 537 + }, + { + "epoch": 1.7870216306156406, + "loss": 0.7678014636039734, + "loss_ce": 0.0027867835015058517, + "loss_iou": 0.26171875, + "loss_num": 0.04833984375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 33672984, + "step": 537 + }, + { + "epoch": 1.7903494176372712, + "grad_norm": 35.10622024536133, + "learning_rate": 5e-06, + "loss": 1.0201, + "num_input_tokens_seen": 33736132, + "step": 538 + }, + { + "epoch": 1.7903494176372712, + "loss": 1.0910940170288086, + "loss_ce": 0.0012502449098974466, + "loss_iou": 0.3125, + "loss_num": 0.0927734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 33736132, + "step": 538 + }, + { + "epoch": 1.7936772046589018, + "grad_norm": 12.398226737976074, + "learning_rate": 5e-06, + "loss": 0.8778, + "num_input_tokens_seen": 33799380, + "step": 539 + }, + { + "epoch": 1.7936772046589018, + "loss": 0.7171542644500732, + "loss_ce": 0.0008456680225208402, + "loss_iou": 0.251953125, + "loss_num": 0.042724609375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 33799380, + "step": 539 + }, + { + "epoch": 1.7970049916805324, + "grad_norm": 14.9580659866333, + "learning_rate": 5e-06, + "loss": 0.7666, + "num_input_tokens_seen": 33862656, + "step": 540 + }, + { + "epoch": 1.7970049916805324, + "loss": 0.9555954933166504, + "loss_ce": 0.0010056574828922749, + "loss_iou": 0.302734375, + "loss_num": 0.0693359375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 33862656, + "step": 540 + }, + { + "epoch": 1.800332778702163, + "grad_norm": 20.136131286621094, + "learning_rate": 5e-06, + "loss": 0.7634, + "num_input_tokens_seen": 33925932, + "step": 541 + }, + { + "epoch": 1.800332778702163, + "loss": 0.685050368309021, + "loss_ce": 0.004142177756875753, + "loss_iou": 0.240234375, + "loss_num": 0.0400390625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 33925932, + "step": 541 + }, + { + "epoch": 1.8036605657237936, + "grad_norm": 26.61849021911621, + "learning_rate": 5e-06, + "loss": 1.1369, + "num_input_tokens_seen": 33989324, + "step": 542 + }, + { + "epoch": 1.8036605657237936, + "loss": 1.1945197582244873, + "loss_ce": 0.0018927238415926695, + "loss_iou": 0.3359375, + "loss_num": 0.10400390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 33989324, + "step": 542 + }, + { + "epoch": 1.8069883527454242, + "grad_norm": 12.676218032836914, + "learning_rate": 5e-06, + "loss": 1.0277, + "num_input_tokens_seen": 34054000, + "step": 543 + }, + { + "epoch": 1.8069883527454242, + "loss": 0.9486079216003418, + "loss_ce": 0.0018306173151358962, + "loss_iou": 0.34765625, + "loss_num": 0.050048828125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 34054000, + "step": 543 + }, + { + "epoch": 1.8103161397670549, + "grad_norm": 14.024618148803711, + "learning_rate": 5e-06, + "loss": 1.0651, + "num_input_tokens_seen": 34118096, + "step": 544 + }, + { + "epoch": 1.8103161397670549, + "loss": 1.2064943313598633, + "loss_ce": 0.0009278804645873606, + "loss_iou": 0.3828125, + "loss_num": 0.08837890625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 34118096, + "step": 544 + }, + { + "epoch": 1.8136439267886857, + "grad_norm": 15.398441314697266, + "learning_rate": 5e-06, + "loss": 0.8867, + "num_input_tokens_seen": 34181016, + "step": 545 + }, + { + "epoch": 1.8136439267886857, + "loss": 0.6198215484619141, + "loss_ce": 0.00019265212176833302, + "loss_iou": 0.2021484375, + "loss_num": 0.043212890625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 34181016, + "step": 545 + }, + { + "epoch": 1.8169717138103163, + "grad_norm": 18.64105987548828, + "learning_rate": 5e-06, + "loss": 1.0866, + "num_input_tokens_seen": 34242020, + "step": 546 + }, + { + "epoch": 1.8169717138103163, + "loss": 0.9077228903770447, + "loss_ce": 0.0011066340375691652, + "loss_iou": 0.283203125, + "loss_num": 0.0673828125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 34242020, + "step": 546 + }, + { + "epoch": 1.820299500831947, + "grad_norm": 13.642433166503906, + "learning_rate": 5e-06, + "loss": 0.9115, + "num_input_tokens_seen": 34305132, + "step": 547 + }, + { + "epoch": 1.820299500831947, + "loss": 0.6709655523300171, + "loss_ce": 0.0017760555492714047, + "loss_iou": 0.203125, + "loss_num": 0.052734375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 34305132, + "step": 547 + }, + { + "epoch": 1.8236272878535775, + "grad_norm": 37.51459503173828, + "learning_rate": 5e-06, + "loss": 1.0159, + "num_input_tokens_seen": 34367900, + "step": 548 + }, + { + "epoch": 1.8236272878535775, + "loss": 1.0078930854797363, + "loss_ce": 0.0005688223754987121, + "loss_iou": 0.37890625, + "loss_num": 0.050048828125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 34367900, + "step": 548 + }, + { + "epoch": 1.8269550748752081, + "grad_norm": 27.26789093017578, + "learning_rate": 5e-06, + "loss": 1.1389, + "num_input_tokens_seen": 34429256, + "step": 549 + }, + { + "epoch": 1.8269550748752081, + "loss": 1.143343448638916, + "loss_ce": 0.001985959243029356, + "loss_iou": 0.32421875, + "loss_num": 0.09814453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 34429256, + "step": 549 + }, + { + "epoch": 1.8302828618968388, + "grad_norm": 14.803655624389648, + "learning_rate": 5e-06, + "loss": 1.3364, + "num_input_tokens_seen": 34492152, + "step": 550 + }, + { + "epoch": 1.8302828618968388, + "loss": 1.4599002599716187, + "loss_ce": 0.0004276781983207911, + "loss_iou": 0.412109375, + "loss_num": 0.126953125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 34492152, + "step": 550 + }, + { + "epoch": 1.8336106489184694, + "grad_norm": 26.294727325439453, + "learning_rate": 5e-06, + "loss": 1.1966, + "num_input_tokens_seen": 34555988, + "step": 551 + }, + { + "epoch": 1.8336106489184694, + "loss": 1.2220561504364014, + "loss_ce": 0.002573746722191572, + "loss_iou": 0.404296875, + "loss_num": 0.08203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 34555988, + "step": 551 + }, + { + "epoch": 1.8369384359401, + "grad_norm": 14.449104309082031, + "learning_rate": 5e-06, + "loss": 0.9287, + "num_input_tokens_seen": 34619232, + "step": 552 + }, + { + "epoch": 1.8369384359401, + "loss": 0.9966768026351929, + "loss_ce": 0.0013155062915757298, + "loss_iou": 0.3671875, + "loss_num": 0.052490234375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 34619232, + "step": 552 + }, + { + "epoch": 1.8402662229617306, + "grad_norm": 23.511919021606445, + "learning_rate": 5e-06, + "loss": 1.0271, + "num_input_tokens_seen": 34682704, + "step": 553 + }, + { + "epoch": 1.8402662229617306, + "loss": 1.2029976844787598, + "loss_ce": 0.0035348027013242245, + "loss_iou": 0.4765625, + "loss_num": 0.049072265625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 34682704, + "step": 553 + }, + { + "epoch": 1.8435940099833612, + "grad_norm": 12.143444061279297, + "learning_rate": 5e-06, + "loss": 0.8477, + "num_input_tokens_seen": 34745964, + "step": 554 + }, + { + "epoch": 1.8435940099833612, + "loss": 1.0100423097610474, + "loss_ce": 0.00012407411122694612, + "loss_iou": 0.37109375, + "loss_num": 0.0537109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 34745964, + "step": 554 + }, + { + "epoch": 1.8469217970049918, + "grad_norm": 26.108985900878906, + "learning_rate": 5e-06, + "loss": 0.9329, + "num_input_tokens_seen": 34808712, + "step": 555 + }, + { + "epoch": 1.8469217970049918, + "loss": 0.9639697074890137, + "loss_ce": 0.0013231942430138588, + "loss_iou": 0.326171875, + "loss_num": 0.06201171875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 34808712, + "step": 555 + }, + { + "epoch": 1.8502495840266224, + "grad_norm": 18.90593147277832, + "learning_rate": 5e-06, + "loss": 1.0983, + "num_input_tokens_seen": 34871224, + "step": 556 + }, + { + "epoch": 1.8502495840266224, + "loss": 1.1608165502548218, + "loss_ce": 0.0004161183023825288, + "loss_iou": 0.4375, + "loss_num": 0.056884765625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 34871224, + "step": 556 + }, + { + "epoch": 1.853577371048253, + "grad_norm": 19.406909942626953, + "learning_rate": 5e-06, + "loss": 1.0324, + "num_input_tokens_seen": 34931504, + "step": 557 + }, + { + "epoch": 1.853577371048253, + "loss": 1.2264602184295654, + "loss_ce": 0.0003859272401314229, + "loss_iou": 0.421875, + "loss_num": 0.07666015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 34931504, + "step": 557 + }, + { + "epoch": 1.8569051580698837, + "grad_norm": 12.974056243896484, + "learning_rate": 5e-06, + "loss": 0.7937, + "num_input_tokens_seen": 34994100, + "step": 558 + }, + { + "epoch": 1.8569051580698837, + "loss": 0.8865928649902344, + "loss_ce": 0.0032920846715569496, + "loss_iou": 0.283203125, + "loss_num": 0.06298828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 34994100, + "step": 558 + }, + { + "epoch": 1.8602329450915143, + "grad_norm": 13.932268142700195, + "learning_rate": 5e-06, + "loss": 1.1015, + "num_input_tokens_seen": 35056884, + "step": 559 + }, + { + "epoch": 1.8602329450915143, + "loss": 1.1955981254577637, + "loss_ce": 0.0007738770800642669, + "loss_iou": 0.435546875, + "loss_num": 0.064453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 35056884, + "step": 559 + }, + { + "epoch": 1.8635607321131449, + "grad_norm": 14.666391372680664, + "learning_rate": 5e-06, + "loss": 0.8643, + "num_input_tokens_seen": 35120340, + "step": 560 + }, + { + "epoch": 1.8635607321131449, + "loss": 0.886032223701477, + "loss_ce": 0.0012666526017710567, + "loss_iou": 0.283203125, + "loss_num": 0.0634765625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 35120340, + "step": 560 + }, + { + "epoch": 1.8668885191347755, + "grad_norm": 13.323609352111816, + "learning_rate": 5e-06, + "loss": 1.1881, + "num_input_tokens_seen": 35183468, + "step": 561 + }, + { + "epoch": 1.8668885191347755, + "loss": 1.152817964553833, + "loss_ce": 0.0019389993976801634, + "loss_iou": 0.36328125, + "loss_num": 0.08544921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 35183468, + "step": 561 + }, + { + "epoch": 1.870216306156406, + "grad_norm": 9.693658828735352, + "learning_rate": 5e-06, + "loss": 1.1869, + "num_input_tokens_seen": 35247208, + "step": 562 + }, + { + "epoch": 1.870216306156406, + "loss": 1.3116389513015747, + "loss_ce": 0.0006038228166289628, + "loss_iou": 0.44921875, + "loss_num": 0.08251953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 35247208, + "step": 562 + }, + { + "epoch": 1.8735440931780367, + "grad_norm": 29.306072235107422, + "learning_rate": 5e-06, + "loss": 1.1472, + "num_input_tokens_seen": 35310828, + "step": 563 + }, + { + "epoch": 1.8735440931780367, + "loss": 1.2778418064117432, + "loss_ce": 0.0068457284942269325, + "loss_iou": 0.44140625, + "loss_num": 0.078125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 35310828, + "step": 563 + }, + { + "epoch": 1.8768718801996673, + "grad_norm": 27.0247745513916, + "learning_rate": 5e-06, + "loss": 0.9282, + "num_input_tokens_seen": 35373164, + "step": 564 + }, + { + "epoch": 1.8768718801996673, + "loss": 0.9917428493499756, + "loss_ce": 0.00028781441505998373, + "loss_iou": 0.345703125, + "loss_num": 0.059814453125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 35373164, + "step": 564 + }, + { + "epoch": 1.880199667221298, + "grad_norm": 14.91286563873291, + "learning_rate": 5e-06, + "loss": 0.9329, + "num_input_tokens_seen": 35435044, + "step": 565 + }, + { + "epoch": 1.880199667221298, + "loss": 1.015300989151001, + "loss_ce": 0.00016425539797637612, + "loss_iou": 0.310546875, + "loss_num": 0.07958984375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 35435044, + "step": 565 + }, + { + "epoch": 1.8835274542429286, + "grad_norm": 10.378997802734375, + "learning_rate": 5e-06, + "loss": 0.9905, + "num_input_tokens_seen": 35497864, + "step": 566 + }, + { + "epoch": 1.8835274542429286, + "loss": 1.1247049570083618, + "loss_ce": 0.0009256677003577352, + "loss_iou": 0.349609375, + "loss_num": 0.0849609375, + "loss_xval": 1.125, + "num_input_tokens_seen": 35497864, + "step": 566 + }, + { + "epoch": 1.8868552412645592, + "grad_norm": 17.83684539794922, + "learning_rate": 5e-06, + "loss": 0.929, + "num_input_tokens_seen": 35560536, + "step": 567 + }, + { + "epoch": 1.8868552412645592, + "loss": 0.6144291162490845, + "loss_ce": 0.00017130836204160005, + "loss_iou": 0.1376953125, + "loss_num": 0.06787109375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 35560536, + "step": 567 + }, + { + "epoch": 1.8901830282861898, + "grad_norm": 49.60697937011719, + "learning_rate": 5e-06, + "loss": 0.9559, + "num_input_tokens_seen": 35622356, + "step": 568 + }, + { + "epoch": 1.8901830282861898, + "loss": 1.1087956428527832, + "loss_ce": 0.0001530621957499534, + "loss_iou": 0.2392578125, + "loss_num": 0.1259765625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 35622356, + "step": 568 + }, + { + "epoch": 1.8935108153078204, + "grad_norm": 14.60826301574707, + "learning_rate": 5e-06, + "loss": 1.121, + "num_input_tokens_seen": 35685972, + "step": 569 + }, + { + "epoch": 1.8935108153078204, + "loss": 1.326939582824707, + "loss_ce": 0.0022325818426907063, + "loss_iou": 0.380859375, + "loss_num": 0.11279296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 35685972, + "step": 569 + }, + { + "epoch": 1.896838602329451, + "grad_norm": 25.41444969177246, + "learning_rate": 5e-06, + "loss": 0.8218, + "num_input_tokens_seen": 35747280, + "step": 570 + }, + { + "epoch": 1.896838602329451, + "loss": 0.8077418804168701, + "loss_ce": 0.0006130048423074186, + "loss_iou": 0.2216796875, + "loss_num": 0.07275390625, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 35747280, + "step": 570 + }, + { + "epoch": 1.9001663893510816, + "grad_norm": 24.91752052307129, + "learning_rate": 5e-06, + "loss": 1.1637, + "num_input_tokens_seen": 35811528, + "step": 571 + }, + { + "epoch": 1.9001663893510816, + "loss": 1.3153760433197021, + "loss_ce": 0.0033644186332821846, + "loss_iou": 0.486328125, + "loss_num": 0.06787109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 35811528, + "step": 571 + }, + { + "epoch": 1.9034941763727122, + "grad_norm": 25.198698043823242, + "learning_rate": 5e-06, + "loss": 1.0152, + "num_input_tokens_seen": 35874204, + "step": 572 + }, + { + "epoch": 1.9034941763727122, + "loss": 1.3468836545944214, + "loss_ce": 0.0002039041864918545, + "loss_iou": 0.4453125, + "loss_num": 0.09130859375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 35874204, + "step": 572 + }, + { + "epoch": 1.9068219633943428, + "grad_norm": 17.966543197631836, + "learning_rate": 5e-06, + "loss": 0.8496, + "num_input_tokens_seen": 35936360, + "step": 573 + }, + { + "epoch": 1.9068219633943428, + "loss": 1.1128754615783691, + "loss_ce": 0.00032671194639988244, + "loss_iou": 0.376953125, + "loss_num": 0.0712890625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 35936360, + "step": 573 + }, + { + "epoch": 1.9101497504159735, + "grad_norm": 22.120344161987305, + "learning_rate": 5e-06, + "loss": 1.0125, + "num_input_tokens_seen": 35998784, + "step": 574 + }, + { + "epoch": 1.9101497504159735, + "loss": 1.294684648513794, + "loss_ce": 0.0002511721686460078, + "loss_iou": 0.416015625, + "loss_num": 0.09228515625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 35998784, + "step": 574 + }, + { + "epoch": 1.913477537437604, + "grad_norm": 14.170149803161621, + "learning_rate": 5e-06, + "loss": 0.9052, + "num_input_tokens_seen": 36061612, + "step": 575 + }, + { + "epoch": 1.913477537437604, + "loss": 0.7781501412391663, + "loss_ce": 0.0025154012255370617, + "loss_iou": 0.2392578125, + "loss_num": 0.05908203125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 36061612, + "step": 575 + }, + { + "epoch": 1.9168053244592347, + "grad_norm": 38.99104309082031, + "learning_rate": 5e-06, + "loss": 1.1207, + "num_input_tokens_seen": 36124752, + "step": 576 + }, + { + "epoch": 1.9168053244592347, + "loss": 1.3191872835159302, + "loss_ce": 0.0008279454777948558, + "loss_iou": 0.443359375, + "loss_num": 0.0859375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 36124752, + "step": 576 + }, + { + "epoch": 1.9201331114808653, + "grad_norm": 18.79828453063965, + "learning_rate": 5e-06, + "loss": 1.0237, + "num_input_tokens_seen": 36185564, + "step": 577 + }, + { + "epoch": 1.9201331114808653, + "loss": 1.0448555946350098, + "loss_ce": 0.0010322753805667162, + "loss_iou": 0.310546875, + "loss_num": 0.08447265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 36185564, + "step": 577 + }, + { + "epoch": 1.923460898502496, + "grad_norm": 8.662841796875, + "learning_rate": 5e-06, + "loss": 0.8287, + "num_input_tokens_seen": 36248000, + "step": 578 + }, + { + "epoch": 1.923460898502496, + "loss": 0.7125368118286133, + "loss_ce": 0.00013448702520690858, + "loss_iou": 0.2470703125, + "loss_num": 0.043212890625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 36248000, + "step": 578 + }, + { + "epoch": 1.9267886855241265, + "grad_norm": 24.12902069091797, + "learning_rate": 5e-06, + "loss": 0.9049, + "num_input_tokens_seen": 36310656, + "step": 579 + }, + { + "epoch": 1.9267886855241265, + "loss": 0.8042501211166382, + "loss_ce": 0.0005391854792833328, + "loss_iou": 0.2138671875, + "loss_num": 0.0751953125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 36310656, + "step": 579 + }, + { + "epoch": 1.9301164725457571, + "grad_norm": 14.56750202178955, + "learning_rate": 5e-06, + "loss": 0.8137, + "num_input_tokens_seen": 36373116, + "step": 580 + }, + { + "epoch": 1.9301164725457571, + "loss": 0.6355822086334229, + "loss_ce": 0.003990426659584045, + "loss_iou": 0.19921875, + "loss_num": 0.04638671875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 36373116, + "step": 580 + }, + { + "epoch": 1.9334442595673877, + "grad_norm": 17.040950775146484, + "learning_rate": 5e-06, + "loss": 0.9652, + "num_input_tokens_seen": 36434072, + "step": 581 + }, + { + "epoch": 1.9334442595673877, + "loss": 1.1782926321029663, + "loss_ce": 0.004586563445627689, + "loss_iou": 0.33984375, + "loss_num": 0.0986328125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 36434072, + "step": 581 + }, + { + "epoch": 1.9367720465890184, + "grad_norm": 10.302577018737793, + "learning_rate": 5e-06, + "loss": 0.7532, + "num_input_tokens_seen": 36496096, + "step": 582 + }, + { + "epoch": 1.9367720465890184, + "loss": 0.8137335777282715, + "loss_ce": 0.0005012074834667146, + "loss_iou": 0.291015625, + "loss_num": 0.046142578125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 36496096, + "step": 582 + }, + { + "epoch": 1.940099833610649, + "grad_norm": 15.283492088317871, + "learning_rate": 5e-06, + "loss": 0.9993, + "num_input_tokens_seen": 36558260, + "step": 583 + }, + { + "epoch": 1.940099833610649, + "loss": 0.8162379860877991, + "loss_ce": 0.00044212467037141323, + "loss_iou": 0.26953125, + "loss_num": 0.055419921875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 36558260, + "step": 583 + }, + { + "epoch": 1.9434276206322796, + "grad_norm": 10.120014190673828, + "learning_rate": 5e-06, + "loss": 0.6606, + "num_input_tokens_seen": 36620356, + "step": 584 + }, + { + "epoch": 1.9434276206322796, + "loss": 0.6671352386474609, + "loss_ce": 0.00014302069030236453, + "loss_iou": 0.236328125, + "loss_num": 0.039306640625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 36620356, + "step": 584 + }, + { + "epoch": 1.9467554076539102, + "grad_norm": 10.893766403198242, + "learning_rate": 5e-06, + "loss": 0.9976, + "num_input_tokens_seen": 36683052, + "step": 585 + }, + { + "epoch": 1.9467554076539102, + "loss": 0.9768415689468384, + "loss_ce": 0.0027204775251448154, + "loss_iou": 0.30859375, + "loss_num": 0.0712890625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 36683052, + "step": 585 + }, + { + "epoch": 1.9500831946755408, + "grad_norm": 22.243104934692383, + "learning_rate": 5e-06, + "loss": 1.0094, + "num_input_tokens_seen": 36745780, + "step": 586 + }, + { + "epoch": 1.9500831946755408, + "loss": 0.975989043712616, + "loss_ce": 0.0006472546374425292, + "loss_iou": 0.314453125, + "loss_num": 0.06884765625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 36745780, + "step": 586 + }, + { + "epoch": 1.9534109816971714, + "grad_norm": 17.508132934570312, + "learning_rate": 5e-06, + "loss": 1.1263, + "num_input_tokens_seen": 36809268, + "step": 587 + }, + { + "epoch": 1.9534109816971714, + "loss": 0.9999065399169922, + "loss_ce": 0.0018596657318994403, + "loss_iou": 0.27734375, + "loss_num": 0.08837890625, + "loss_xval": 1.0, + "num_input_tokens_seen": 36809268, + "step": 587 + }, + { + "epoch": 1.956738768718802, + "grad_norm": 13.245006561279297, + "learning_rate": 5e-06, + "loss": 0.8024, + "num_input_tokens_seen": 36873044, + "step": 588 + }, + { + "epoch": 1.956738768718802, + "loss": 0.6673635244369507, + "loss_ce": 0.00024929430219344795, + "loss_iou": 0.2734375, + "loss_num": 0.0240478515625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 36873044, + "step": 588 + }, + { + "epoch": 1.9600665557404326, + "grad_norm": 9.61874008178711, + "learning_rate": 5e-06, + "loss": 1.037, + "num_input_tokens_seen": 36935692, + "step": 589 + }, + { + "epoch": 1.9600665557404326, + "loss": 0.9211355447769165, + "loss_ce": 0.0014577455585822463, + "loss_iou": 0.3203125, + "loss_num": 0.05615234375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 36935692, + "step": 589 + }, + { + "epoch": 1.9633943427620633, + "grad_norm": 10.554235458374023, + "learning_rate": 5e-06, + "loss": 0.9399, + "num_input_tokens_seen": 36997520, + "step": 590 + }, + { + "epoch": 1.9633943427620633, + "loss": 1.0528907775878906, + "loss_ce": 0.0004004701040685177, + "loss_iou": 0.3515625, + "loss_num": 0.0703125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 36997520, + "step": 590 + }, + { + "epoch": 1.9667221297836939, + "grad_norm": 17.416133880615234, + "learning_rate": 5e-06, + "loss": 0.7433, + "num_input_tokens_seen": 37060460, + "step": 591 + }, + { + "epoch": 1.9667221297836939, + "loss": 0.858521044254303, + "loss_ce": 0.0008550334605388343, + "loss_iou": 0.25390625, + "loss_num": 0.0703125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 37060460, + "step": 591 + }, + { + "epoch": 1.9700499168053245, + "grad_norm": 14.890344619750977, + "learning_rate": 5e-06, + "loss": 0.9326, + "num_input_tokens_seen": 37123204, + "step": 592 + }, + { + "epoch": 1.9700499168053245, + "loss": 0.8313470482826233, + "loss_ce": 0.013231809251010418, + "loss_iou": 0.240234375, + "loss_num": 0.06787109375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 37123204, + "step": 592 + }, + { + "epoch": 1.973377703826955, + "grad_norm": 14.116703987121582, + "learning_rate": 5e-06, + "loss": 1.1915, + "num_input_tokens_seen": 37185468, + "step": 593 + }, + { + "epoch": 1.973377703826955, + "loss": 1.041740894317627, + "loss_ce": 0.000511517224367708, + "loss_iou": 0.38671875, + "loss_num": 0.053466796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 37185468, + "step": 593 + }, + { + "epoch": 1.9767054908485857, + "grad_norm": 10.910758018493652, + "learning_rate": 5e-06, + "loss": 0.7212, + "num_input_tokens_seen": 37245908, + "step": 594 + }, + { + "epoch": 1.9767054908485857, + "loss": 0.6745211482048035, + "loss_ce": 0.001547462772578001, + "loss_iou": 0.18359375, + "loss_num": 0.060791015625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 37245908, + "step": 594 + }, + { + "epoch": 1.9800332778702163, + "grad_norm": 11.645059585571289, + "learning_rate": 5e-06, + "loss": 0.9004, + "num_input_tokens_seen": 37310236, + "step": 595 + }, + { + "epoch": 1.9800332778702163, + "loss": 0.8556256294250488, + "loss_ce": 0.006992822512984276, + "loss_iou": 0.31640625, + "loss_num": 0.043212890625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 37310236, + "step": 595 + }, + { + "epoch": 1.983361064891847, + "grad_norm": 12.184340476989746, + "learning_rate": 5e-06, + "loss": 1.0324, + "num_input_tokens_seen": 37371568, + "step": 596 + }, + { + "epoch": 1.983361064891847, + "loss": 1.0166329145431519, + "loss_ce": 0.0027778963558375835, + "loss_iou": 0.33203125, + "loss_num": 0.0703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 37371568, + "step": 596 + }, + { + "epoch": 1.9866888519134775, + "grad_norm": 10.037638664245605, + "learning_rate": 5e-06, + "loss": 0.6938, + "num_input_tokens_seen": 37434188, + "step": 597 + }, + { + "epoch": 1.9866888519134775, + "loss": 0.8472949266433716, + "loss_ce": 0.0015917927958071232, + "loss_iou": 0.2197265625, + "loss_num": 0.08154296875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 37434188, + "step": 597 + }, + { + "epoch": 1.9900166389351082, + "grad_norm": 28.653364181518555, + "learning_rate": 5e-06, + "loss": 1.3632, + "num_input_tokens_seen": 37497236, + "step": 598 + }, + { + "epoch": 1.9900166389351082, + "loss": 1.1080653667449951, + "loss_ce": 0.00015516526764258742, + "loss_iou": 0.369140625, + "loss_num": 0.07421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 37497236, + "step": 598 + }, + { + "epoch": 1.9933444259567388, + "grad_norm": 63.16008377075195, + "learning_rate": 5e-06, + "loss": 0.984, + "num_input_tokens_seen": 37559724, + "step": 599 + }, + { + "epoch": 1.9933444259567388, + "loss": 1.0816211700439453, + "loss_ce": 0.00044438272016122937, + "loss_iou": 0.359375, + "loss_num": 0.072265625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 37559724, + "step": 599 + }, + { + "epoch": 1.9966722129783694, + "grad_norm": 12.790926933288574, + "learning_rate": 5e-06, + "loss": 1.2626, + "num_input_tokens_seen": 37623232, + "step": 600 + }, + { + "epoch": 1.9966722129783694, + "loss": 1.700850009918213, + "loss_ce": 0.0013871309347450733, + "loss_iou": 0.5078125, + "loss_num": 0.13671875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 37623232, + "step": 600 + }, + { + "epoch": 2.0, + "grad_norm": 17.36669158935547, + "learning_rate": 5e-06, + "loss": 0.7352, + "num_input_tokens_seen": 37686136, + "step": 601 + }, + { + "epoch": 2.0, + "loss": 0.7832919359207153, + "loss_ce": 0.0008212359971366823, + "loss_iou": 0.205078125, + "loss_num": 0.07470703125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 37686136, + "step": 601 + }, + { + "epoch": 2.0033277870216306, + "grad_norm": 18.40623664855957, + "learning_rate": 5e-06, + "loss": 1.1938, + "num_input_tokens_seen": 37749472, + "step": 602 + }, + { + "epoch": 2.0033277870216306, + "loss": 1.2827110290527344, + "loss_ce": 0.00048443087143823504, + "loss_iou": 0.447265625, + "loss_num": 0.07763671875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 37749472, + "step": 602 + }, + { + "epoch": 2.0066555740432612, + "grad_norm": 12.205756187438965, + "learning_rate": 5e-06, + "loss": 0.5865, + "num_input_tokens_seen": 37811236, + "step": 603 + }, + { + "epoch": 2.0066555740432612, + "loss": 0.42705678939819336, + "loss_ce": 0.00029895632178522646, + "loss_iou": 0.0, + "loss_num": 0.08544921875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 37811236, + "step": 603 + }, + { + "epoch": 2.009983361064892, + "grad_norm": 10.160212516784668, + "learning_rate": 5e-06, + "loss": 0.5894, + "num_input_tokens_seen": 37872200, + "step": 604 + }, + { + "epoch": 2.009983361064892, + "loss": 0.7009243965148926, + "loss_ce": 0.00024079754075501114, + "loss_iou": 0.1689453125, + "loss_num": 0.07275390625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 37872200, + "step": 604 + }, + { + "epoch": 2.0133111480865225, + "grad_norm": 13.080687522888184, + "learning_rate": 5e-06, + "loss": 0.7835, + "num_input_tokens_seen": 37933716, + "step": 605 + }, + { + "epoch": 2.0133111480865225, + "loss": 1.0986953973770142, + "loss_ce": 6.257939094211906e-05, + "loss_iou": 0.3828125, + "loss_num": 0.06640625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 37933716, + "step": 605 + }, + { + "epoch": 2.016638935108153, + "grad_norm": 10.649163246154785, + "learning_rate": 5e-06, + "loss": 0.7663, + "num_input_tokens_seen": 37994536, + "step": 606 + }, + { + "epoch": 2.016638935108153, + "loss": 0.9843522310256958, + "loss_ce": 0.0007096048793755472, + "loss_iou": 0.275390625, + "loss_num": 0.0869140625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 37994536, + "step": 606 + }, + { + "epoch": 2.0199667221297837, + "grad_norm": 10.702330589294434, + "learning_rate": 5e-06, + "loss": 0.6246, + "num_input_tokens_seen": 38057508, + "step": 607 + }, + { + "epoch": 2.0199667221297837, + "loss": 0.6482642889022827, + "loss_ce": 0.0010474994778633118, + "loss_iou": 0.2197265625, + "loss_num": 0.041748046875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 38057508, + "step": 607 + }, + { + "epoch": 2.0232945091514143, + "grad_norm": 12.19349479675293, + "learning_rate": 5e-06, + "loss": 0.7891, + "num_input_tokens_seen": 38119664, + "step": 608 + }, + { + "epoch": 2.0232945091514143, + "loss": 0.8228746652603149, + "loss_ce": 0.0001207729583256878, + "loss_iou": 0.2470703125, + "loss_num": 0.06591796875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 38119664, + "step": 608 + }, + { + "epoch": 2.026622296173045, + "grad_norm": 30.340932846069336, + "learning_rate": 5e-06, + "loss": 1.0127, + "num_input_tokens_seen": 38184528, + "step": 609 + }, + { + "epoch": 2.026622296173045, + "loss": 1.135079026222229, + "loss_ce": 0.0008016878855414689, + "loss_iou": 0.45703125, + "loss_num": 0.0439453125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 38184528, + "step": 609 + }, + { + "epoch": 2.0299500831946755, + "grad_norm": 19.663864135742188, + "learning_rate": 5e-06, + "loss": 1.0414, + "num_input_tokens_seen": 38247428, + "step": 610 + }, + { + "epoch": 2.0299500831946755, + "loss": 1.2719979286193848, + "loss_ce": 0.00026946913567371666, + "loss_iou": 0.478515625, + "loss_num": 0.0634765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 38247428, + "step": 610 + }, + { + "epoch": 2.033277870216306, + "grad_norm": 38.78046798706055, + "learning_rate": 5e-06, + "loss": 0.6704, + "num_input_tokens_seen": 38309384, + "step": 611 + }, + { + "epoch": 2.033277870216306, + "loss": 0.9232205152511597, + "loss_ce": 0.0001247778272954747, + "loss_iou": 0.302734375, + "loss_num": 0.0634765625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 38309384, + "step": 611 + }, + { + "epoch": 2.0366056572379367, + "grad_norm": 21.593708038330078, + "learning_rate": 5e-06, + "loss": 0.8872, + "num_input_tokens_seen": 38373688, + "step": 612 + }, + { + "epoch": 2.0366056572379367, + "loss": 0.8260167837142944, + "loss_ce": 8.90689916559495e-05, + "loss_iou": 0.291015625, + "loss_num": 0.048828125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 38373688, + "step": 612 + }, + { + "epoch": 2.0399334442595674, + "grad_norm": 28.829166412353516, + "learning_rate": 5e-06, + "loss": 1.195, + "num_input_tokens_seen": 38435916, + "step": 613 + }, + { + "epoch": 2.0399334442595674, + "loss": 1.4014410972595215, + "loss_ce": 0.000562186527531594, + "loss_iou": 0.47265625, + "loss_num": 0.091796875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 38435916, + "step": 613 + }, + { + "epoch": 2.043261231281198, + "grad_norm": 30.802501678466797, + "learning_rate": 5e-06, + "loss": 0.928, + "num_input_tokens_seen": 38499540, + "step": 614 + }, + { + "epoch": 2.043261231281198, + "loss": 1.0729480981826782, + "loss_ce": 0.017283970490098, + "loss_iou": 0.33984375, + "loss_num": 0.0751953125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 38499540, + "step": 614 + }, + { + "epoch": 2.0465890183028286, + "grad_norm": 14.069050788879395, + "learning_rate": 5e-06, + "loss": 0.9339, + "num_input_tokens_seen": 38563296, + "step": 615 + }, + { + "epoch": 2.0465890183028286, + "loss": 1.0433433055877686, + "loss_ce": 0.01477888599038124, + "loss_iou": 0.3984375, + "loss_num": 0.046630859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 38563296, + "step": 615 + }, + { + "epoch": 2.049916805324459, + "grad_norm": 8.753564834594727, + "learning_rate": 5e-06, + "loss": 0.71, + "num_input_tokens_seen": 38624788, + "step": 616 + }, + { + "epoch": 2.049916805324459, + "loss": 0.8744981288909912, + "loss_ce": 0.006578153930604458, + "loss_iou": 0.28125, + "loss_num": 0.061279296875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 38624788, + "step": 616 + }, + { + "epoch": 2.05324459234609, + "grad_norm": 21.490896224975586, + "learning_rate": 5e-06, + "loss": 1.1, + "num_input_tokens_seen": 38689684, + "step": 617 + }, + { + "epoch": 2.05324459234609, + "loss": 1.2144798040390015, + "loss_ce": 0.0028098488692194223, + "loss_iou": 0.4453125, + "loss_num": 0.064453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 38689684, + "step": 617 + }, + { + "epoch": 2.0565723793677204, + "grad_norm": 14.222006797790527, + "learning_rate": 5e-06, + "loss": 0.6892, + "num_input_tokens_seen": 38752120, + "step": 618 + }, + { + "epoch": 2.0565723793677204, + "loss": 0.7122447490692139, + "loss_ce": 0.0017955549992620945, + "loss_iou": 0.2021484375, + "loss_num": 0.0615234375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 38752120, + "step": 618 + }, + { + "epoch": 2.059900166389351, + "grad_norm": 13.638813972473145, + "learning_rate": 5e-06, + "loss": 0.8149, + "num_input_tokens_seen": 38812068, + "step": 619 + }, + { + "epoch": 2.059900166389351, + "loss": 1.018181562423706, + "loss_ce": 0.00011516415543155745, + "loss_iou": 0.291015625, + "loss_num": 0.087890625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 38812068, + "step": 619 + }, + { + "epoch": 2.0632279534109816, + "grad_norm": 16.652631759643555, + "learning_rate": 5e-06, + "loss": 0.7474, + "num_input_tokens_seen": 38874664, + "step": 620 + }, + { + "epoch": 2.0632279534109816, + "loss": 0.42710453271865845, + "loss_ce": 0.00028570188442245126, + "loss_iou": 0.09228515625, + "loss_num": 0.048583984375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 38874664, + "step": 620 + }, + { + "epoch": 2.0665557404326123, + "grad_norm": 22.291889190673828, + "learning_rate": 5e-06, + "loss": 0.9719, + "num_input_tokens_seen": 38938248, + "step": 621 + }, + { + "epoch": 2.0665557404326123, + "loss": 0.9289200305938721, + "loss_ce": 0.00020914892957080156, + "loss_iou": 0.318359375, + "loss_num": 0.05810546875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 38938248, + "step": 621 + }, + { + "epoch": 2.069883527454243, + "grad_norm": 21.66976547241211, + "learning_rate": 5e-06, + "loss": 0.6807, + "num_input_tokens_seen": 39000808, + "step": 622 + }, + { + "epoch": 2.069883527454243, + "loss": 0.7833365201950073, + "loss_ce": 0.0003775203076656908, + "loss_iou": 0.267578125, + "loss_num": 0.0498046875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 39000808, + "step": 622 + }, + { + "epoch": 2.0732113144758735, + "grad_norm": 13.521284103393555, + "learning_rate": 5e-06, + "loss": 0.9122, + "num_input_tokens_seen": 39064556, + "step": 623 + }, + { + "epoch": 2.0732113144758735, + "loss": 0.8654112815856934, + "loss_ce": 0.0004210532642900944, + "loss_iou": 0.318359375, + "loss_num": 0.04541015625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 39064556, + "step": 623 + }, + { + "epoch": 2.076539101497504, + "grad_norm": 9.097267150878906, + "learning_rate": 5e-06, + "loss": 0.7518, + "num_input_tokens_seen": 39127724, + "step": 624 + }, + { + "epoch": 2.076539101497504, + "loss": 0.8574149012565613, + "loss_ce": 0.0007254519150592387, + "loss_iou": 0.2734375, + "loss_num": 0.0615234375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 39127724, + "step": 624 + }, + { + "epoch": 2.0798668885191347, + "grad_norm": 62.03600311279297, + "learning_rate": 5e-06, + "loss": 0.779, + "num_input_tokens_seen": 39191480, + "step": 625 + }, + { + "epoch": 2.0798668885191347, + "loss": 0.8742235898971558, + "loss_ce": 7.806658686604351e-05, + "loss_iou": 0.296875, + "loss_num": 0.05615234375, + "loss_xval": 0.875, + "num_input_tokens_seen": 39191480, + "step": 625 + }, + { + "epoch": 2.0831946755407653, + "grad_norm": 37.185428619384766, + "learning_rate": 5e-06, + "loss": 0.7275, + "num_input_tokens_seen": 39252676, + "step": 626 + }, + { + "epoch": 2.0831946755407653, + "loss": 0.7918392419815063, + "loss_ce": 0.0015560202300548553, + "loss_iou": 0.234375, + "loss_num": 0.064453125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 39252676, + "step": 626 + }, + { + "epoch": 2.086522462562396, + "grad_norm": 13.905028343200684, + "learning_rate": 5e-06, + "loss": 0.5983, + "num_input_tokens_seen": 39314092, + "step": 627 + }, + { + "epoch": 2.086522462562396, + "loss": 0.29983216524124146, + "loss_ce": 0.00045470561599358916, + "loss_iou": 0.0, + "loss_num": 0.059814453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 39314092, + "step": 627 + }, + { + "epoch": 2.0898502495840265, + "grad_norm": 21.244510650634766, + "learning_rate": 5e-06, + "loss": 0.8911, + "num_input_tokens_seen": 39377644, + "step": 628 + }, + { + "epoch": 2.0898502495840265, + "loss": 0.9056867361068726, + "loss_ce": 0.00016913384024519473, + "loss_iou": 0.3359375, + "loss_num": 0.046630859375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 39377644, + "step": 628 + }, + { + "epoch": 2.093178036605657, + "grad_norm": 20.721181869506836, + "learning_rate": 5e-06, + "loss": 1.0455, + "num_input_tokens_seen": 39441688, + "step": 629 + }, + { + "epoch": 2.093178036605657, + "loss": 1.0174716711044312, + "loss_ce": 0.004288055002689362, + "loss_iou": 0.328125, + "loss_num": 0.07177734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 39441688, + "step": 629 + }, + { + "epoch": 2.0965058236272878, + "grad_norm": 12.86941909790039, + "learning_rate": 5e-06, + "loss": 0.7168, + "num_input_tokens_seen": 39503164, + "step": 630 + }, + { + "epoch": 2.0965058236272878, + "loss": 0.9291834831237793, + "loss_ce": 0.0034022473264485598, + "loss_iou": 0.330078125, + "loss_num": 0.05322265625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 39503164, + "step": 630 + }, + { + "epoch": 2.0998336106489184, + "grad_norm": 10.400750160217285, + "learning_rate": 5e-06, + "loss": 0.8992, + "num_input_tokens_seen": 39566940, + "step": 631 + }, + { + "epoch": 2.0998336106489184, + "loss": 0.94353187084198, + "loss_ce": 0.001393245765939355, + "loss_iou": 0.349609375, + "loss_num": 0.048095703125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 39566940, + "step": 631 + }, + { + "epoch": 2.103161397670549, + "grad_norm": 9.461636543273926, + "learning_rate": 5e-06, + "loss": 0.6036, + "num_input_tokens_seen": 39629024, + "step": 632 + }, + { + "epoch": 2.103161397670549, + "loss": 0.33993566036224365, + "loss_ce": 0.006317485123872757, + "loss_iou": 0.06982421875, + "loss_num": 0.038818359375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 39629024, + "step": 632 + }, + { + "epoch": 2.1064891846921796, + "grad_norm": 12.25512981414795, + "learning_rate": 5e-06, + "loss": 0.6903, + "num_input_tokens_seen": 39691612, + "step": 633 + }, + { + "epoch": 2.1064891846921796, + "loss": 0.667500376701355, + "loss_ce": 1.9936600438086316e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.041015625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 39691612, + "step": 633 + }, + { + "epoch": 2.10981697171381, + "grad_norm": 12.845260620117188, + "learning_rate": 5e-06, + "loss": 1.0146, + "num_input_tokens_seen": 39753308, + "step": 634 + }, + { + "epoch": 2.10981697171381, + "loss": 0.9783458113670349, + "loss_ce": 0.0008067445596680045, + "loss_iou": 0.271484375, + "loss_num": 0.08642578125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 39753308, + "step": 634 + }, + { + "epoch": 2.113144758735441, + "grad_norm": 12.062701225280762, + "learning_rate": 5e-06, + "loss": 0.9782, + "num_input_tokens_seen": 39815888, + "step": 635 + }, + { + "epoch": 2.113144758735441, + "loss": 1.166348934173584, + "loss_ce": 0.0003332831838633865, + "loss_iou": 0.291015625, + "loss_num": 0.11669921875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 39815888, + "step": 635 + }, + { + "epoch": 2.1164725457570714, + "grad_norm": 13.48031234741211, + "learning_rate": 5e-06, + "loss": 0.9052, + "num_input_tokens_seen": 39877392, + "step": 636 + }, + { + "epoch": 2.1164725457570714, + "loss": 0.8594918251037598, + "loss_ce": 0.0008491812041029334, + "loss_iou": 0.21484375, + "loss_num": 0.0859375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 39877392, + "step": 636 + }, + { + "epoch": 2.119800332778702, + "grad_norm": 11.958724975585938, + "learning_rate": 5e-06, + "loss": 0.8913, + "num_input_tokens_seen": 39940516, + "step": 637 + }, + { + "epoch": 2.119800332778702, + "loss": 0.792966902256012, + "loss_ce": 0.009763746522367, + "loss_iou": 0.2392578125, + "loss_num": 0.06103515625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 39940516, + "step": 637 + }, + { + "epoch": 2.1231281198003327, + "grad_norm": 15.40645694732666, + "learning_rate": 5e-06, + "loss": 0.718, + "num_input_tokens_seen": 40003388, + "step": 638 + }, + { + "epoch": 2.1231281198003327, + "loss": 0.7531487941741943, + "loss_ce": 0.00021912308875471354, + "loss_iou": 0.28125, + "loss_num": 0.0380859375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 40003388, + "step": 638 + }, + { + "epoch": 2.1264559068219633, + "grad_norm": 32.53299331665039, + "learning_rate": 5e-06, + "loss": 1.1154, + "num_input_tokens_seen": 40066328, + "step": 639 + }, + { + "epoch": 2.1264559068219633, + "loss": 1.099750280380249, + "loss_ce": 0.0001409617834724486, + "loss_iou": 0.396484375, + "loss_num": 0.061279296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 40066328, + "step": 639 + }, + { + "epoch": 2.129783693843594, + "grad_norm": 33.41072082519531, + "learning_rate": 5e-06, + "loss": 0.8616, + "num_input_tokens_seen": 40127316, + "step": 640 + }, + { + "epoch": 2.129783693843594, + "loss": 0.9770931601524353, + "loss_ce": 0.0015072214882820845, + "loss_iou": 0.28125, + "loss_num": 0.0830078125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 40127316, + "step": 640 + }, + { + "epoch": 2.1331114808652245, + "grad_norm": 13.928618431091309, + "learning_rate": 5e-06, + "loss": 1.0466, + "num_input_tokens_seen": 40190528, + "step": 641 + }, + { + "epoch": 2.1331114808652245, + "loss": 1.031181812286377, + "loss_ce": 0.0015187868848443031, + "loss_iou": 0.341796875, + "loss_num": 0.0693359375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 40190528, + "step": 641 + }, + { + "epoch": 2.136439267886855, + "grad_norm": 19.280794143676758, + "learning_rate": 5e-06, + "loss": 0.9631, + "num_input_tokens_seen": 40252724, + "step": 642 + }, + { + "epoch": 2.136439267886855, + "loss": 0.7468453645706177, + "loss_ce": 0.0005074806977063417, + "loss_iou": 0.26171875, + "loss_num": 0.04443359375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 40252724, + "step": 642 + }, + { + "epoch": 2.1397670549084857, + "grad_norm": 9.668257713317871, + "learning_rate": 5e-06, + "loss": 0.8991, + "num_input_tokens_seen": 40315860, + "step": 643 + }, + { + "epoch": 2.1397670549084857, + "loss": 0.768240213394165, + "loss_ce": 0.0001738157297950238, + "loss_iou": 0.30859375, + "loss_num": 0.0302734375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 40315860, + "step": 643 + }, + { + "epoch": 2.1430948419301163, + "grad_norm": 16.013214111328125, + "learning_rate": 5e-06, + "loss": 0.97, + "num_input_tokens_seen": 40379176, + "step": 644 + }, + { + "epoch": 2.1430948419301163, + "loss": 0.7990410327911377, + "loss_ce": 0.001677798223681748, + "loss_iou": 0.2353515625, + "loss_num": 0.0654296875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 40379176, + "step": 644 + }, + { + "epoch": 2.146422628951747, + "grad_norm": 8.998737335205078, + "learning_rate": 5e-06, + "loss": 0.8889, + "num_input_tokens_seen": 40440688, + "step": 645 + }, + { + "epoch": 2.146422628951747, + "loss": 1.1174098253250122, + "loss_ce": 0.00022229723981581628, + "loss_iou": 0.2490234375, + "loss_num": 0.1240234375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 40440688, + "step": 645 + }, + { + "epoch": 2.1497504159733776, + "grad_norm": 12.142661094665527, + "learning_rate": 5e-06, + "loss": 0.7721, + "num_input_tokens_seen": 40502032, + "step": 646 + }, + { + "epoch": 2.1497504159733776, + "loss": 0.9419033527374268, + "loss_ce": 8.781585165706929e-06, + "loss_iou": 0.255859375, + "loss_num": 0.08544921875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 40502032, + "step": 646 + }, + { + "epoch": 2.153078202995008, + "grad_norm": 12.413509368896484, + "learning_rate": 5e-06, + "loss": 0.6306, + "num_input_tokens_seen": 40563216, + "step": 647 + }, + { + "epoch": 2.153078202995008, + "loss": 0.6236965656280518, + "loss_ce": 0.0004055765166413039, + "loss_iou": 0.15625, + "loss_num": 0.062255859375, + "loss_xval": 0.625, + "num_input_tokens_seen": 40563216, + "step": 647 + }, + { + "epoch": 2.156405990016639, + "grad_norm": 12.308266639709473, + "learning_rate": 5e-06, + "loss": 1.0149, + "num_input_tokens_seen": 40626760, + "step": 648 + }, + { + "epoch": 2.156405990016639, + "loss": 1.0296001434326172, + "loss_ce": 0.0006693830946460366, + "loss_iou": 0.328125, + "loss_num": 0.07421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 40626760, + "step": 648 + }, + { + "epoch": 2.1597337770382694, + "grad_norm": 8.219746589660645, + "learning_rate": 5e-06, + "loss": 1.0916, + "num_input_tokens_seen": 40689820, + "step": 649 + }, + { + "epoch": 2.1597337770382694, + "loss": 1.043705940246582, + "loss_ce": 0.0009814061922952533, + "loss_iou": 0.333984375, + "loss_num": 0.07470703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 40689820, + "step": 649 + }, + { + "epoch": 2.1630615640599, + "grad_norm": 16.483530044555664, + "learning_rate": 5e-06, + "loss": 1.3246, + "num_input_tokens_seen": 40753476, + "step": 650 + }, + { + "epoch": 2.1630615640599, + "loss": 1.3722883462905884, + "loss_ce": 0.0011946188751608133, + "loss_iou": 0.466796875, + "loss_num": 0.08740234375, + "loss_xval": 1.375, + "num_input_tokens_seen": 40753476, + "step": 650 + }, + { + "epoch": 2.1663893510815306, + "grad_norm": 15.433051109313965, + "learning_rate": 5e-06, + "loss": 0.8102, + "num_input_tokens_seen": 40816652, + "step": 651 + }, + { + "epoch": 2.1663893510815306, + "loss": 0.710527241230011, + "loss_ce": 0.0022752864751964808, + "loss_iou": 0.251953125, + "loss_num": 0.040771484375, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 40816652, + "step": 651 + }, + { + "epoch": 2.1697171381031612, + "grad_norm": 6.271609306335449, + "learning_rate": 5e-06, + "loss": 0.4691, + "num_input_tokens_seen": 40877216, + "step": 652 + }, + { + "epoch": 2.1697171381031612, + "loss": 0.44094425439834595, + "loss_ce": 2.6259316655341536e-05, + "loss_iou": 0.07275390625, + "loss_num": 0.05908203125, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 40877216, + "step": 652 + }, + { + "epoch": 2.173044925124792, + "grad_norm": 13.27111530303955, + "learning_rate": 5e-06, + "loss": 0.815, + "num_input_tokens_seen": 40938780, + "step": 653 + }, + { + "epoch": 2.173044925124792, + "loss": 0.9975978136062622, + "loss_ce": 0.000771641731262207, + "loss_iou": 0.283203125, + "loss_num": 0.0859375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 40938780, + "step": 653 + }, + { + "epoch": 2.1763727121464225, + "grad_norm": 13.250811576843262, + "learning_rate": 5e-06, + "loss": 0.8086, + "num_input_tokens_seen": 41002692, + "step": 654 + }, + { + "epoch": 2.1763727121464225, + "loss": 0.5700994729995728, + "loss_ce": 0.00027528181090019643, + "loss_iou": 0.15234375, + "loss_num": 0.052978515625, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 41002692, + "step": 654 + }, + { + "epoch": 2.179700499168053, + "grad_norm": 43.69986343383789, + "learning_rate": 5e-06, + "loss": 1.2292, + "num_input_tokens_seen": 41065588, + "step": 655 + }, + { + "epoch": 2.179700499168053, + "loss": 1.126779317855835, + "loss_ce": 0.0010468140244483948, + "loss_iou": 0.38671875, + "loss_num": 0.07080078125, + "loss_xval": 1.125, + "num_input_tokens_seen": 41065588, + "step": 655 + }, + { + "epoch": 2.1830282861896837, + "grad_norm": 23.3741455078125, + "learning_rate": 5e-06, + "loss": 0.7644, + "num_input_tokens_seen": 41127908, + "step": 656 + }, + { + "epoch": 2.1830282861896837, + "loss": 0.8155651092529297, + "loss_ce": 0.0005016563227400184, + "loss_iou": 0.2373046875, + "loss_num": 0.06787109375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 41127908, + "step": 656 + }, + { + "epoch": 2.1863560732113143, + "grad_norm": 16.306617736816406, + "learning_rate": 5e-06, + "loss": 1.0647, + "num_input_tokens_seen": 41190336, + "step": 657 + }, + { + "epoch": 2.1863560732113143, + "loss": 1.1568994522094727, + "loss_ce": 0.00016120978398248553, + "loss_iou": 0.3828125, + "loss_num": 0.07763671875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 41190336, + "step": 657 + }, + { + "epoch": 2.189683860232945, + "grad_norm": 11.455361366271973, + "learning_rate": 5e-06, + "loss": 0.7928, + "num_input_tokens_seen": 41252080, + "step": 658 + }, + { + "epoch": 2.189683860232945, + "loss": 0.7935343980789185, + "loss_ce": 0.0005656481371261179, + "loss_iou": 0.220703125, + "loss_num": 0.0703125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 41252080, + "step": 658 + }, + { + "epoch": 2.1930116472545755, + "grad_norm": 14.715596199035645, + "learning_rate": 5e-06, + "loss": 0.9227, + "num_input_tokens_seen": 41314652, + "step": 659 + }, + { + "epoch": 2.1930116472545755, + "loss": 0.932908833026886, + "loss_ce": 4.7503406676696613e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0654296875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 41314652, + "step": 659 + }, + { + "epoch": 2.196339434276206, + "grad_norm": 31.348485946655273, + "learning_rate": 5e-06, + "loss": 0.9761, + "num_input_tokens_seen": 41377348, + "step": 660 + }, + { + "epoch": 2.196339434276206, + "loss": 0.9302453398704529, + "loss_ce": 6.955982826184481e-05, + "loss_iou": 0.314453125, + "loss_num": 0.060302734375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 41377348, + "step": 660 + }, + { + "epoch": 2.1996672212978368, + "grad_norm": 35.344242095947266, + "learning_rate": 5e-06, + "loss": 1.0837, + "num_input_tokens_seen": 41440992, + "step": 661 + }, + { + "epoch": 2.1996672212978368, + "loss": 0.8363819122314453, + "loss_ce": 0.00020026677520945668, + "loss_iou": 0.283203125, + "loss_num": 0.0537109375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 41440992, + "step": 661 + }, + { + "epoch": 2.2029950083194674, + "grad_norm": 26.1990909576416, + "learning_rate": 5e-06, + "loss": 0.8925, + "num_input_tokens_seen": 41504132, + "step": 662 + }, + { + "epoch": 2.2029950083194674, + "loss": 0.7424072027206421, + "loss_ce": 0.00046388505143113434, + "loss_iou": 0.244140625, + "loss_num": 0.05078125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 41504132, + "step": 662 + }, + { + "epoch": 2.206322795341098, + "grad_norm": 9.237807273864746, + "learning_rate": 5e-06, + "loss": 0.6939, + "num_input_tokens_seen": 41565620, + "step": 663 + }, + { + "epoch": 2.206322795341098, + "loss": 0.7877798080444336, + "loss_ce": 6.009737990098074e-05, + "loss_iou": 0.205078125, + "loss_num": 0.07568359375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 41565620, + "step": 663 + }, + { + "epoch": 2.2096505823627286, + "grad_norm": 15.479573249816895, + "learning_rate": 5e-06, + "loss": 1.0272, + "num_input_tokens_seen": 41629236, + "step": 664 + }, + { + "epoch": 2.2096505823627286, + "loss": 1.0712847709655762, + "loss_ce": 0.00048389771836809814, + "loss_iou": 0.33203125, + "loss_num": 0.0810546875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 41629236, + "step": 664 + }, + { + "epoch": 2.212978369384359, + "grad_norm": 32.5822868347168, + "learning_rate": 5e-06, + "loss": 0.9038, + "num_input_tokens_seen": 41692904, + "step": 665 + }, + { + "epoch": 2.212978369384359, + "loss": 0.9757400751113892, + "loss_ce": 0.0011306863743811846, + "loss_iou": 0.2890625, + "loss_num": 0.07958984375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 41692904, + "step": 665 + }, + { + "epoch": 2.21630615640599, + "grad_norm": 15.892915725708008, + "learning_rate": 5e-06, + "loss": 0.9478, + "num_input_tokens_seen": 41755028, + "step": 666 + }, + { + "epoch": 2.21630615640599, + "loss": 1.2628767490386963, + "loss_ce": 0.0004256761458236724, + "loss_iou": 0.42578125, + "loss_num": 0.08251953125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 41755028, + "step": 666 + }, + { + "epoch": 2.2196339434276204, + "grad_norm": 32.91965103149414, + "learning_rate": 5e-06, + "loss": 1.0484, + "num_input_tokens_seen": 41819808, + "step": 667 + }, + { + "epoch": 2.2196339434276204, + "loss": 1.0486059188842773, + "loss_ce": 0.001242599100805819, + "loss_iou": 0.34765625, + "loss_num": 0.07080078125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 41819808, + "step": 667 + }, + { + "epoch": 2.222961730449251, + "grad_norm": 6.644551753997803, + "learning_rate": 5e-06, + "loss": 0.8515, + "num_input_tokens_seen": 41883088, + "step": 668 + }, + { + "epoch": 2.222961730449251, + "loss": 0.8012328147888184, + "loss_ce": 0.00032947398722171783, + "loss_iou": 0.205078125, + "loss_num": 0.078125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 41883088, + "step": 668 + }, + { + "epoch": 2.2262895174708817, + "grad_norm": 19.913354873657227, + "learning_rate": 5e-06, + "loss": 0.9162, + "num_input_tokens_seen": 41946620, + "step": 669 + }, + { + "epoch": 2.2262895174708817, + "loss": 0.6975362300872803, + "loss_ce": 0.0007588434964418411, + "loss_iou": 0.236328125, + "loss_num": 0.04443359375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 41946620, + "step": 669 + }, + { + "epoch": 2.2296173044925123, + "grad_norm": 25.868154525756836, + "learning_rate": 5e-06, + "loss": 0.8179, + "num_input_tokens_seen": 42009140, + "step": 670 + }, + { + "epoch": 2.2296173044925123, + "loss": 0.7486177086830139, + "loss_ce": 8.254170825239271e-05, + "loss_iou": 0.275390625, + "loss_num": 0.03955078125, + "loss_xval": 0.75, + "num_input_tokens_seen": 42009140, + "step": 670 + }, + { + "epoch": 2.232945091514143, + "grad_norm": 16.261491775512695, + "learning_rate": 5e-06, + "loss": 0.8723, + "num_input_tokens_seen": 42069640, + "step": 671 + }, + { + "epoch": 2.232945091514143, + "loss": 1.1612319946289062, + "loss_ce": 0.0005875469068996608, + "loss_iou": 0.361328125, + "loss_num": 0.087890625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 42069640, + "step": 671 + }, + { + "epoch": 2.2362728785357735, + "grad_norm": 9.516688346862793, + "learning_rate": 5e-06, + "loss": 0.8249, + "num_input_tokens_seen": 42132272, + "step": 672 + }, + { + "epoch": 2.2362728785357735, + "loss": 0.815399706363678, + "loss_ce": 0.0001836877636378631, + "loss_iou": 0.3046875, + "loss_num": 0.040771484375, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 42132272, + "step": 672 + }, + { + "epoch": 2.239600665557404, + "grad_norm": 12.779966354370117, + "learning_rate": 5e-06, + "loss": 0.7785, + "num_input_tokens_seen": 42194892, + "step": 673 + }, + { + "epoch": 2.239600665557404, + "loss": 0.6607878804206848, + "loss_ce": 0.00014333476428873837, + "loss_iou": 0.15625, + "loss_num": 0.0693359375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 42194892, + "step": 673 + }, + { + "epoch": 2.2429284525790347, + "grad_norm": 10.946259498596191, + "learning_rate": 5e-06, + "loss": 0.7948, + "num_input_tokens_seen": 42256156, + "step": 674 + }, + { + "epoch": 2.2429284525790347, + "loss": 0.764301061630249, + "loss_ce": 0.0003850538341794163, + "loss_iou": 0.232421875, + "loss_num": 0.06005859375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 42256156, + "step": 674 + }, + { + "epoch": 2.2462562396006653, + "grad_norm": 9.564322471618652, + "learning_rate": 5e-06, + "loss": 0.6524, + "num_input_tokens_seen": 42318292, + "step": 675 + }, + { + "epoch": 2.2462562396006653, + "loss": 0.6682754158973694, + "loss_ce": 6.24852254986763e-05, + "loss_iou": 0.234375, + "loss_num": 0.0400390625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 42318292, + "step": 675 + }, + { + "epoch": 2.249584026622296, + "grad_norm": 13.924878120422363, + "learning_rate": 5e-06, + "loss": 0.8124, + "num_input_tokens_seen": 42377848, + "step": 676 + }, + { + "epoch": 2.249584026622296, + "loss": 0.6967849731445312, + "loss_ce": 0.0017471231985837221, + "loss_iou": 0.212890625, + "loss_num": 0.0537109375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 42377848, + "step": 676 + }, + { + "epoch": 2.2529118136439266, + "grad_norm": 16.81814956665039, + "learning_rate": 5e-06, + "loss": 0.902, + "num_input_tokens_seen": 42439512, + "step": 677 + }, + { + "epoch": 2.2529118136439266, + "loss": 0.8860781788825989, + "loss_ce": 0.00033597912988625467, + "loss_iou": 0.279296875, + "loss_num": 0.06591796875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 42439512, + "step": 677 + }, + { + "epoch": 2.256239600665557, + "grad_norm": 11.292144775390625, + "learning_rate": 5e-06, + "loss": 1.0798, + "num_input_tokens_seen": 42502844, + "step": 678 + }, + { + "epoch": 2.256239600665557, + "loss": 1.1715800762176514, + "loss_ce": 0.00019335387332830578, + "loss_iou": 0.392578125, + "loss_num": 0.0771484375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 42502844, + "step": 678 + }, + { + "epoch": 2.259567387687188, + "grad_norm": 7.578566551208496, + "learning_rate": 5e-06, + "loss": 1.047, + "num_input_tokens_seen": 42565372, + "step": 679 + }, + { + "epoch": 2.259567387687188, + "loss": 1.0372211933135986, + "loss_ce": 0.0010883715003728867, + "loss_iou": 0.365234375, + "loss_num": 0.0615234375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 42565372, + "step": 679 + }, + { + "epoch": 2.2628951747088184, + "grad_norm": 17.18895721435547, + "learning_rate": 5e-06, + "loss": 0.9563, + "num_input_tokens_seen": 42627684, + "step": 680 + }, + { + "epoch": 2.2628951747088184, + "loss": 1.0924670696258545, + "loss_ce": 0.0006701992242597044, + "loss_iou": 0.34375, + "loss_num": 0.08056640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 42627684, + "step": 680 + }, + { + "epoch": 2.266222961730449, + "grad_norm": 9.680091857910156, + "learning_rate": 5e-06, + "loss": 1.0404, + "num_input_tokens_seen": 42691788, + "step": 681 + }, + { + "epoch": 2.266222961730449, + "loss": 0.8576643466949463, + "loss_ce": 0.00048660385073162615, + "loss_iou": 0.310546875, + "loss_num": 0.046875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 42691788, + "step": 681 + }, + { + "epoch": 2.2695507487520796, + "grad_norm": 23.31703758239746, + "learning_rate": 5e-06, + "loss": 0.8207, + "num_input_tokens_seen": 42754224, + "step": 682 + }, + { + "epoch": 2.2695507487520796, + "loss": 0.6044555902481079, + "loss_ce": 8.549916674382985e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.0478515625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 42754224, + "step": 682 + }, + { + "epoch": 2.2728785357737102, + "grad_norm": 10.370177268981934, + "learning_rate": 5e-06, + "loss": 0.9969, + "num_input_tokens_seen": 42817536, + "step": 683 + }, + { + "epoch": 2.2728785357737102, + "loss": 1.0521934032440186, + "loss_ce": 0.0008017598884180188, + "loss_iou": 0.310546875, + "loss_num": 0.08544921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 42817536, + "step": 683 + }, + { + "epoch": 2.276206322795341, + "grad_norm": 11.924843788146973, + "learning_rate": 5e-06, + "loss": 1.1041, + "num_input_tokens_seen": 42880584, + "step": 684 + }, + { + "epoch": 2.276206322795341, + "loss": 0.924105703830719, + "loss_ce": 0.0005217483267188072, + "loss_iou": 0.287109375, + "loss_num": 0.0703125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 42880584, + "step": 684 + }, + { + "epoch": 2.2795341098169715, + "grad_norm": 12.28469467163086, + "learning_rate": 5e-06, + "loss": 0.7665, + "num_input_tokens_seen": 42943528, + "step": 685 + }, + { + "epoch": 2.2795341098169715, + "loss": 0.8234906196594238, + "loss_ce": 0.00012639828491955996, + "loss_iou": 0.267578125, + "loss_num": 0.05712890625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 42943528, + "step": 685 + }, + { + "epoch": 2.2828618968386025, + "grad_norm": 38.641693115234375, + "learning_rate": 5e-06, + "loss": 0.9702, + "num_input_tokens_seen": 43007544, + "step": 686 + }, + { + "epoch": 2.2828618968386025, + "loss": 0.8214001655578613, + "loss_ce": 0.00011106484453193843, + "loss_iou": 0.306640625, + "loss_num": 0.0419921875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 43007544, + "step": 686 + }, + { + "epoch": 2.286189683860233, + "grad_norm": 22.426542282104492, + "learning_rate": 5e-06, + "loss": 0.6703, + "num_input_tokens_seen": 43069176, + "step": 687 + }, + { + "epoch": 2.286189683860233, + "loss": 0.6367377042770386, + "loss_ce": 0.0007513945456594229, + "loss_iou": 0.2216796875, + "loss_num": 0.03857421875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 43069176, + "step": 687 + }, + { + "epoch": 2.2895174708818637, + "grad_norm": 27.776212692260742, + "learning_rate": 5e-06, + "loss": 0.798, + "num_input_tokens_seen": 43133544, + "step": 688 + }, + { + "epoch": 2.2895174708818637, + "loss": 0.882362961769104, + "loss_ce": 0.00016075739404186606, + "loss_iou": 0.31640625, + "loss_num": 0.050048828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 43133544, + "step": 688 + }, + { + "epoch": 2.2928452579034944, + "grad_norm": 9.201436996459961, + "learning_rate": 5e-06, + "loss": 1.0724, + "num_input_tokens_seen": 43197028, + "step": 689 + }, + { + "epoch": 2.2928452579034944, + "loss": 1.2933428287506104, + "loss_ce": 0.001350653124973178, + "loss_iou": 0.453125, + "loss_num": 0.07666015625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 43197028, + "step": 689 + }, + { + "epoch": 2.296173044925125, + "grad_norm": 11.568137168884277, + "learning_rate": 5e-06, + "loss": 0.728, + "num_input_tokens_seen": 43260444, + "step": 690 + }, + { + "epoch": 2.296173044925125, + "loss": 0.6836668252944946, + "loss_ce": 0.001293812645599246, + "loss_iou": 0.2470703125, + "loss_num": 0.03759765625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 43260444, + "step": 690 + }, + { + "epoch": 2.2995008319467556, + "grad_norm": 14.878931999206543, + "learning_rate": 5e-06, + "loss": 1.0874, + "num_input_tokens_seen": 43324336, + "step": 691 + }, + { + "epoch": 2.2995008319467556, + "loss": 1.3032047748565674, + "loss_ce": 0.0009586900705471635, + "loss_iou": 0.3984375, + "loss_num": 0.1005859375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 43324336, + "step": 691 + }, + { + "epoch": 2.302828618968386, + "grad_norm": 15.7121000289917, + "learning_rate": 5e-06, + "loss": 0.6999, + "num_input_tokens_seen": 43386868, + "step": 692 + }, + { + "epoch": 2.302828618968386, + "loss": 0.6661006212234497, + "loss_ce": 0.00032910649315454066, + "loss_iou": 0.220703125, + "loss_num": 0.044677734375, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 43386868, + "step": 692 + }, + { + "epoch": 2.306156405990017, + "grad_norm": 10.086871147155762, + "learning_rate": 5e-06, + "loss": 0.7448, + "num_input_tokens_seen": 43449464, + "step": 693 + }, + { + "epoch": 2.306156405990017, + "loss": 0.49396997690200806, + "loss_ce": 0.0011721242917701602, + "loss_iou": 0.1337890625, + "loss_num": 0.044921875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 43449464, + "step": 693 + }, + { + "epoch": 2.3094841930116474, + "grad_norm": 29.50054931640625, + "learning_rate": 5e-06, + "loss": 0.9566, + "num_input_tokens_seen": 43513148, + "step": 694 + }, + { + "epoch": 2.3094841930116474, + "loss": 1.0682692527770996, + "loss_ce": 0.0008863506955094635, + "loss_iou": 0.3671875, + "loss_num": 0.06640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 43513148, + "step": 694 + }, + { + "epoch": 2.312811980033278, + "grad_norm": 11.244392395019531, + "learning_rate": 5e-06, + "loss": 0.7949, + "num_input_tokens_seen": 43572220, + "step": 695 + }, + { + "epoch": 2.312811980033278, + "loss": 0.8442493677139282, + "loss_ce": 0.00025524571537971497, + "loss_iou": 0.275390625, + "loss_num": 0.05859375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 43572220, + "step": 695 + }, + { + "epoch": 2.3161397670549086, + "grad_norm": 34.46859359741211, + "learning_rate": 5e-06, + "loss": 0.8627, + "num_input_tokens_seen": 43635076, + "step": 696 + }, + { + "epoch": 2.3161397670549086, + "loss": 0.9726455211639404, + "loss_ce": 0.00023341998166870326, + "loss_iou": 0.31640625, + "loss_num": 0.06787109375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 43635076, + "step": 696 + }, + { + "epoch": 2.3194675540765393, + "grad_norm": 32.053955078125, + "learning_rate": 5e-06, + "loss": 1.0028, + "num_input_tokens_seen": 43698032, + "step": 697 + }, + { + "epoch": 2.3194675540765393, + "loss": 0.7314814925193787, + "loss_ce": 3.620800271164626e-05, + "loss_iou": 0.271484375, + "loss_num": 0.037353515625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 43698032, + "step": 697 + }, + { + "epoch": 2.32279534109817, + "grad_norm": 14.377496719360352, + "learning_rate": 5e-06, + "loss": 0.7126, + "num_input_tokens_seen": 43759968, + "step": 698 + }, + { + "epoch": 2.32279534109817, + "loss": 0.6350549459457397, + "loss_ce": 0.0002893089840654284, + "loss_iou": 0.21484375, + "loss_num": 0.040771484375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 43759968, + "step": 698 + }, + { + "epoch": 2.3261231281198005, + "grad_norm": 10.184778213500977, + "learning_rate": 5e-06, + "loss": 0.8103, + "num_input_tokens_seen": 43823276, + "step": 699 + }, + { + "epoch": 2.3261231281198005, + "loss": 0.973152220249176, + "loss_ce": 0.0004960413789376616, + "loss_iou": 0.306640625, + "loss_num": 0.07177734375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 43823276, + "step": 699 + }, + { + "epoch": 2.329450915141431, + "grad_norm": 10.185461044311523, + "learning_rate": 5e-06, + "loss": 0.9135, + "num_input_tokens_seen": 43886252, + "step": 700 + }, + { + "epoch": 2.329450915141431, + "loss": 0.7423095107078552, + "loss_ce": 0.0008544151787646115, + "loss_iou": 0.240234375, + "loss_num": 0.05224609375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 43886252, + "step": 700 + }, + { + "epoch": 2.3327787021630617, + "grad_norm": 14.283385276794434, + "learning_rate": 5e-06, + "loss": 0.8628, + "num_input_tokens_seen": 43950104, + "step": 701 + }, + { + "epoch": 2.3327787021630617, + "loss": 0.9078635573387146, + "loss_ce": 2.6576359232421964e-05, + "loss_iou": 0.283203125, + "loss_num": 0.06787109375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 43950104, + "step": 701 + }, + { + "epoch": 2.3361064891846923, + "grad_norm": 17.961149215698242, + "learning_rate": 5e-06, + "loss": 0.8288, + "num_input_tokens_seen": 44013476, + "step": 702 + }, + { + "epoch": 2.3361064891846923, + "loss": 0.6979770660400391, + "loss_ce": 0.0004673382209148258, + "loss_iou": 0.20703125, + "loss_num": 0.056640625, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 44013476, + "step": 702 + }, + { + "epoch": 2.339434276206323, + "grad_norm": 16.723167419433594, + "learning_rate": 5e-06, + "loss": 0.8697, + "num_input_tokens_seen": 44078112, + "step": 703 + }, + { + "epoch": 2.339434276206323, + "loss": 0.748289167881012, + "loss_ce": 0.000974722090177238, + "loss_iou": 0.283203125, + "loss_num": 0.036376953125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 44078112, + "step": 703 + }, + { + "epoch": 2.3427620632279536, + "grad_norm": 22.591569900512695, + "learning_rate": 5e-06, + "loss": 0.8053, + "num_input_tokens_seen": 44140928, + "step": 704 + }, + { + "epoch": 2.3427620632279536, + "loss": 0.524970293045044, + "loss_ce": 0.000800370064098388, + "loss_iou": 0.1708984375, + "loss_num": 0.036376953125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 44140928, + "step": 704 + }, + { + "epoch": 2.346089850249584, + "grad_norm": 49.59932327270508, + "learning_rate": 5e-06, + "loss": 1.0129, + "num_input_tokens_seen": 44203212, + "step": 705 + }, + { + "epoch": 2.346089850249584, + "loss": 0.6107718348503113, + "loss_ce": 5.408008291851729e-05, + "loss_iou": 0.185546875, + "loss_num": 0.048095703125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 44203212, + "step": 705 + }, + { + "epoch": 2.3494176372712148, + "grad_norm": 12.25649642944336, + "learning_rate": 5e-06, + "loss": 1.0834, + "num_input_tokens_seen": 44267888, + "step": 706 + }, + { + "epoch": 2.3494176372712148, + "loss": 1.069253921508789, + "loss_ce": 0.0007725384784862399, + "loss_iou": 0.36328125, + "loss_num": 0.068359375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 44267888, + "step": 706 + }, + { + "epoch": 2.3527454242928454, + "grad_norm": 8.59157943725586, + "learning_rate": 5e-06, + "loss": 0.9064, + "num_input_tokens_seen": 44331788, + "step": 707 + }, + { + "epoch": 2.3527454242928454, + "loss": 0.8050553798675537, + "loss_ce": 0.0013445127988234162, + "loss_iou": 0.287109375, + "loss_num": 0.0458984375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 44331788, + "step": 707 + }, + { + "epoch": 2.356073211314476, + "grad_norm": 13.44419002532959, + "learning_rate": 5e-06, + "loss": 0.8174, + "num_input_tokens_seen": 44394832, + "step": 708 + }, + { + "epoch": 2.356073211314476, + "loss": 0.922235906124115, + "loss_ce": 0.0008491869666613638, + "loss_iou": 0.341796875, + "loss_num": 0.0478515625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 44394832, + "step": 708 + }, + { + "epoch": 2.3594009983361066, + "grad_norm": 9.479907989501953, + "learning_rate": 5e-06, + "loss": 1.0606, + "num_input_tokens_seen": 44459416, + "step": 709 + }, + { + "epoch": 2.3594009983361066, + "loss": 0.9726129770278931, + "loss_ce": 0.0014215761329978704, + "loss_iou": 0.361328125, + "loss_num": 0.05029296875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 44459416, + "step": 709 + }, + { + "epoch": 2.3627287853577372, + "grad_norm": 7.282090187072754, + "learning_rate": 5e-06, + "loss": 0.5709, + "num_input_tokens_seen": 44519432, + "step": 710 + }, + { + "epoch": 2.3627287853577372, + "loss": 0.7231759428977966, + "loss_ce": 0.0001534855255158618, + "loss_iou": 0.1806640625, + "loss_num": 0.072265625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 44519432, + "step": 710 + }, + { + "epoch": 2.366056572379368, + "grad_norm": 10.938211441040039, + "learning_rate": 5e-06, + "loss": 0.9808, + "num_input_tokens_seen": 44583072, + "step": 711 + }, + { + "epoch": 2.366056572379368, + "loss": 0.8792444467544556, + "loss_ce": 0.0010706019820645452, + "loss_iou": 0.30078125, + "loss_num": 0.0556640625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 44583072, + "step": 711 + }, + { + "epoch": 2.3693843594009985, + "grad_norm": 15.567341804504395, + "learning_rate": 5e-06, + "loss": 0.9657, + "num_input_tokens_seen": 44646108, + "step": 712 + }, + { + "epoch": 2.3693843594009985, + "loss": 1.0545854568481445, + "loss_ce": 0.0018511018715798855, + "loss_iou": 0.390625, + "loss_num": 0.05419921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 44646108, + "step": 712 + }, + { + "epoch": 2.372712146422629, + "grad_norm": 10.228503227233887, + "learning_rate": 5e-06, + "loss": 0.9219, + "num_input_tokens_seen": 44708204, + "step": 713 + }, + { + "epoch": 2.372712146422629, + "loss": 1.1827502250671387, + "loss_ce": 0.0006212661974132061, + "loss_iou": 0.40625, + "loss_num": 0.07421875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 44708204, + "step": 713 + }, + { + "epoch": 2.3760399334442597, + "grad_norm": 12.394549369812012, + "learning_rate": 5e-06, + "loss": 0.6104, + "num_input_tokens_seen": 44769100, + "step": 714 + }, + { + "epoch": 2.3760399334442597, + "loss": 0.5975984334945679, + "loss_ce": 0.0037263473495841026, + "loss_iou": 0.162109375, + "loss_num": 0.053955078125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 44769100, + "step": 714 + }, + { + "epoch": 2.3793677204658903, + "grad_norm": 13.236189842224121, + "learning_rate": 5e-06, + "loss": 0.9356, + "num_input_tokens_seen": 44832944, + "step": 715 + }, + { + "epoch": 2.3793677204658903, + "loss": 0.889470100402832, + "loss_ce": 0.003239629790186882, + "loss_iou": 0.3125, + "loss_num": 0.052490234375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 44832944, + "step": 715 + }, + { + "epoch": 2.382695507487521, + "grad_norm": 15.01264762878418, + "learning_rate": 5e-06, + "loss": 0.937, + "num_input_tokens_seen": 44894316, + "step": 716 + }, + { + "epoch": 2.382695507487521, + "loss": 0.9371458292007446, + "loss_ce": 0.0013547912240028381, + "loss_iou": 0.2265625, + "loss_num": 0.0966796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 44894316, + "step": 716 + }, + { + "epoch": 2.3860232945091515, + "grad_norm": 40.445865631103516, + "learning_rate": 5e-06, + "loss": 1.1198, + "num_input_tokens_seen": 44958448, + "step": 717 + }, + { + "epoch": 2.3860232945091515, + "loss": 1.0740020275115967, + "loss_ce": 0.00027151533868163824, + "loss_iou": 0.38671875, + "loss_num": 0.0595703125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 44958448, + "step": 717 + }, + { + "epoch": 2.389351081530782, + "grad_norm": 21.32187271118164, + "learning_rate": 5e-06, + "loss": 0.8088, + "num_input_tokens_seen": 45020540, + "step": 718 + }, + { + "epoch": 2.389351081530782, + "loss": 0.8202319145202637, + "loss_ce": 0.00028564067906700075, + "loss_iou": 0.267578125, + "loss_num": 0.056884765625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 45020540, + "step": 718 + }, + { + "epoch": 2.3926788685524127, + "grad_norm": 11.086956024169922, + "learning_rate": 5e-06, + "loss": 0.7241, + "num_input_tokens_seen": 45081188, + "step": 719 + }, + { + "epoch": 2.3926788685524127, + "loss": 0.6227037906646729, + "loss_ce": 0.00014521228149533272, + "loss_iou": 0.171875, + "loss_num": 0.0556640625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 45081188, + "step": 719 + }, + { + "epoch": 2.3960066555740434, + "grad_norm": 15.942041397094727, + "learning_rate": 5e-06, + "loss": 0.86, + "num_input_tokens_seen": 45142776, + "step": 720 + }, + { + "epoch": 2.3960066555740434, + "loss": 0.6935033798217773, + "loss_ce": 0.0011205670889467, + "loss_iou": 0.255859375, + "loss_num": 0.035888671875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 45142776, + "step": 720 + }, + { + "epoch": 2.399334442595674, + "grad_norm": 13.693964004516602, + "learning_rate": 5e-06, + "loss": 1.0913, + "num_input_tokens_seen": 45205824, + "step": 721 + }, + { + "epoch": 2.399334442595674, + "loss": 1.0079128742218018, + "loss_ce": 0.0008326807874254882, + "loss_iou": 0.294921875, + "loss_num": 0.08349609375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 45205824, + "step": 721 + }, + { + "epoch": 2.4026622296173046, + "grad_norm": 15.026814460754395, + "learning_rate": 5e-06, + "loss": 0.7762, + "num_input_tokens_seen": 45268764, + "step": 722 + }, + { + "epoch": 2.4026622296173046, + "loss": 0.6626920700073242, + "loss_ce": 9.44572821026668e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0556640625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 45268764, + "step": 722 + }, + { + "epoch": 2.405990016638935, + "grad_norm": 11.194849014282227, + "learning_rate": 5e-06, + "loss": 0.6871, + "num_input_tokens_seen": 45331492, + "step": 723 + }, + { + "epoch": 2.405990016638935, + "loss": 0.48289889097213745, + "loss_ce": 0.000721134536433965, + "loss_iou": 0.125, + "loss_num": 0.046630859375, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 45331492, + "step": 723 + }, + { + "epoch": 2.409317803660566, + "grad_norm": 15.214365005493164, + "learning_rate": 5e-06, + "loss": 0.6964, + "num_input_tokens_seen": 45394532, + "step": 724 + }, + { + "epoch": 2.409317803660566, + "loss": 0.8721684217453003, + "loss_ce": 0.000769482518080622, + "loss_iou": 0.30859375, + "loss_num": 0.050537109375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 45394532, + "step": 724 + }, + { + "epoch": 2.4126455906821964, + "grad_norm": 18.638185501098633, + "learning_rate": 5e-06, + "loss": 0.757, + "num_input_tokens_seen": 45457232, + "step": 725 + }, + { + "epoch": 2.4126455906821964, + "loss": 0.6739429235458374, + "loss_ce": 0.000358955207047984, + "loss_iou": 0.244140625, + "loss_num": 0.037109375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 45457232, + "step": 725 + }, + { + "epoch": 2.415973377703827, + "grad_norm": 18.782562255859375, + "learning_rate": 5e-06, + "loss": 0.8527, + "num_input_tokens_seen": 45519960, + "step": 726 + }, + { + "epoch": 2.415973377703827, + "loss": 0.7281821370124817, + "loss_ce": 0.00113136216532439, + "loss_iou": 0.28125, + "loss_num": 0.033203125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 45519960, + "step": 726 + }, + { + "epoch": 2.4193011647254576, + "grad_norm": 35.47547149658203, + "learning_rate": 5e-06, + "loss": 1.1733, + "num_input_tokens_seen": 45583772, + "step": 727 + }, + { + "epoch": 2.4193011647254576, + "loss": 1.2746448516845703, + "loss_ce": 0.00023083810810931027, + "loss_iou": 0.47265625, + "loss_num": 0.06591796875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 45583772, + "step": 727 + }, + { + "epoch": 2.4226289517470883, + "grad_norm": 30.0634708404541, + "learning_rate": 5e-06, + "loss": 0.9175, + "num_input_tokens_seen": 45646088, + "step": 728 + }, + { + "epoch": 2.4226289517470883, + "loss": 0.9010690450668335, + "loss_ce": 0.016364434733986855, + "loss_iou": 0.302734375, + "loss_num": 0.055908203125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 45646088, + "step": 728 + }, + { + "epoch": 2.425956738768719, + "grad_norm": 15.3126859664917, + "learning_rate": 5e-06, + "loss": 0.6904, + "num_input_tokens_seen": 45708808, + "step": 729 + }, + { + "epoch": 2.425956738768719, + "loss": 0.5067150592803955, + "loss_ce": 0.0014660221058875322, + "loss_iou": 0.109375, + "loss_num": 0.057373046875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 45708808, + "step": 729 + }, + { + "epoch": 2.4292845257903495, + "grad_norm": 19.397184371948242, + "learning_rate": 5e-06, + "loss": 0.8657, + "num_input_tokens_seen": 45770068, + "step": 730 + }, + { + "epoch": 2.4292845257903495, + "loss": 0.9862344264984131, + "loss_ce": 0.001096484367735684, + "loss_iou": 0.330078125, + "loss_num": 0.0654296875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 45770068, + "step": 730 + }, + { + "epoch": 2.43261231281198, + "grad_norm": 7.857912063598633, + "learning_rate": 5e-06, + "loss": 0.6581, + "num_input_tokens_seen": 45832556, + "step": 731 + }, + { + "epoch": 2.43261231281198, + "loss": 0.5868287682533264, + "loss_ce": 0.002844445873051882, + "loss_iou": 0.173828125, + "loss_num": 0.047119140625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 45832556, + "step": 731 + }, + { + "epoch": 2.4359400998336107, + "grad_norm": 28.30218505859375, + "learning_rate": 5e-06, + "loss": 0.9152, + "num_input_tokens_seen": 45894836, + "step": 732 + }, + { + "epoch": 2.4359400998336107, + "loss": 1.060274362564087, + "loss_ce": 0.0003377725079189986, + "loss_iou": 0.3671875, + "loss_num": 0.06494140625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 45894836, + "step": 732 + }, + { + "epoch": 2.4392678868552413, + "grad_norm": 17.082120895385742, + "learning_rate": 5e-06, + "loss": 0.698, + "num_input_tokens_seen": 45957656, + "step": 733 + }, + { + "epoch": 2.4392678868552413, + "loss": 0.6111936569213867, + "loss_ce": 0.0005979241104796529, + "loss_iou": 0.1962890625, + "loss_num": 0.0439453125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 45957656, + "step": 733 + }, + { + "epoch": 2.442595673876872, + "grad_norm": 23.90694236755371, + "learning_rate": 5e-06, + "loss": 0.9885, + "num_input_tokens_seen": 46021108, + "step": 734 + }, + { + "epoch": 2.442595673876872, + "loss": 1.1393766403198242, + "loss_ce": 0.0013150431914255023, + "loss_iou": 0.322265625, + "loss_num": 0.0986328125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 46021108, + "step": 734 + }, + { + "epoch": 2.4459234608985025, + "grad_norm": 11.997917175292969, + "learning_rate": 5e-06, + "loss": 0.8011, + "num_input_tokens_seen": 46083808, + "step": 735 + }, + { + "epoch": 2.4459234608985025, + "loss": 0.8153502345085144, + "loss_ce": 0.003277485491707921, + "loss_iou": 0.24609375, + "loss_num": 0.06396484375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 46083808, + "step": 735 + }, + { + "epoch": 2.449251247920133, + "grad_norm": 27.563457489013672, + "learning_rate": 5e-06, + "loss": 0.8633, + "num_input_tokens_seen": 46146540, + "step": 736 + }, + { + "epoch": 2.449251247920133, + "loss": 0.8773728609085083, + "loss_ce": 0.0001755441480781883, + "loss_iou": 0.287109375, + "loss_num": 0.060546875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 46146540, + "step": 736 + }, + { + "epoch": 2.4525790349417638, + "grad_norm": 14.77198600769043, + "learning_rate": 5e-06, + "loss": 0.9703, + "num_input_tokens_seen": 46209148, + "step": 737 + }, + { + "epoch": 2.4525790349417638, + "loss": 0.757889986038208, + "loss_ce": 0.0027630457188934088, + "loss_iou": 0.2412109375, + "loss_num": 0.0546875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 46209148, + "step": 737 + }, + { + "epoch": 2.4559068219633944, + "grad_norm": 11.89362907409668, + "learning_rate": 5e-06, + "loss": 0.7569, + "num_input_tokens_seen": 46272732, + "step": 738 + }, + { + "epoch": 2.4559068219633944, + "loss": 0.7210886478424072, + "loss_ce": 0.0008737589814700186, + "loss_iou": 0.24609375, + "loss_num": 0.04541015625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 46272732, + "step": 738 + }, + { + "epoch": 2.459234608985025, + "grad_norm": 4.648507118225098, + "learning_rate": 5e-06, + "loss": 0.619, + "num_input_tokens_seen": 46336108, + "step": 739 + }, + { + "epoch": 2.459234608985025, + "loss": 0.7744088768959045, + "loss_ce": 0.00014738636673428118, + "loss_iou": 0.25, + "loss_num": 0.0546875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 46336108, + "step": 739 + }, + { + "epoch": 2.4625623960066556, + "grad_norm": 21.0081844329834, + "learning_rate": 5e-06, + "loss": 0.7802, + "num_input_tokens_seen": 46397180, + "step": 740 + }, + { + "epoch": 2.4625623960066556, + "loss": 0.857671856880188, + "loss_ce": 0.0025693103671073914, + "loss_iou": 0.2734375, + "loss_num": 0.061767578125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 46397180, + "step": 740 + }, + { + "epoch": 2.465890183028286, + "grad_norm": 13.0715913772583, + "learning_rate": 5e-06, + "loss": 0.7849, + "num_input_tokens_seen": 46459372, + "step": 741 + }, + { + "epoch": 2.465890183028286, + "loss": 0.6565080881118774, + "loss_ce": 0.006361585110425949, + "loss_iou": 0.1689453125, + "loss_num": 0.0625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 46459372, + "step": 741 + }, + { + "epoch": 2.469217970049917, + "grad_norm": 9.451695442199707, + "learning_rate": 5e-06, + "loss": 0.991, + "num_input_tokens_seen": 46523256, + "step": 742 + }, + { + "epoch": 2.469217970049917, + "loss": 1.2594883441925049, + "loss_ce": 0.00021096415002830327, + "loss_iou": 0.44921875, + "loss_num": 0.072265625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 46523256, + "step": 742 + }, + { + "epoch": 2.4725457570715474, + "grad_norm": 13.714978218078613, + "learning_rate": 5e-06, + "loss": 0.9132, + "num_input_tokens_seen": 46587516, + "step": 743 + }, + { + "epoch": 2.4725457570715474, + "loss": 1.0107247829437256, + "loss_ce": 0.0005928677855990827, + "loss_iou": 0.3359375, + "loss_num": 0.0673828125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 46587516, + "step": 743 + }, + { + "epoch": 2.475873544093178, + "grad_norm": 12.65008544921875, + "learning_rate": 5e-06, + "loss": 1.0877, + "num_input_tokens_seen": 46650720, + "step": 744 + }, + { + "epoch": 2.475873544093178, + "loss": 1.1367709636688232, + "loss_ce": 0.0029819714836776257, + "loss_iou": 0.359375, + "loss_num": 0.0830078125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 46650720, + "step": 744 + }, + { + "epoch": 2.4792013311148087, + "grad_norm": 36.57686996459961, + "learning_rate": 5e-06, + "loss": 0.8593, + "num_input_tokens_seen": 46715288, + "step": 745 + }, + { + "epoch": 2.4792013311148087, + "loss": 0.6860495209693909, + "loss_ce": 1.4360236491484102e-05, + "loss_iou": 0.23046875, + "loss_num": 0.045166015625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 46715288, + "step": 745 + }, + { + "epoch": 2.4825291181364393, + "grad_norm": 12.971878051757812, + "learning_rate": 5e-06, + "loss": 0.7946, + "num_input_tokens_seen": 46777768, + "step": 746 + }, + { + "epoch": 2.4825291181364393, + "loss": 0.78072589635849, + "loss_ce": 0.0009407766629010439, + "loss_iou": 0.212890625, + "loss_num": 0.07080078125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 46777768, + "step": 746 + }, + { + "epoch": 2.48585690515807, + "grad_norm": 11.606049537658691, + "learning_rate": 5e-06, + "loss": 0.8739, + "num_input_tokens_seen": 46839216, + "step": 747 + }, + { + "epoch": 2.48585690515807, + "loss": 0.9141720533370972, + "loss_ce": 0.0010860441252589226, + "loss_iou": 0.3203125, + "loss_num": 0.0546875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 46839216, + "step": 747 + }, + { + "epoch": 2.4891846921797005, + "grad_norm": 17.032249450683594, + "learning_rate": 5e-06, + "loss": 0.5929, + "num_input_tokens_seen": 46899036, + "step": 748 + }, + { + "epoch": 2.4891846921797005, + "loss": 0.5191299319267273, + "loss_ce": 8.698241435922682e-05, + "loss_iou": 0.126953125, + "loss_num": 0.052978515625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 46899036, + "step": 748 + }, + { + "epoch": 2.492512479201331, + "grad_norm": 11.303759574890137, + "learning_rate": 5e-06, + "loss": 0.805, + "num_input_tokens_seen": 46961588, + "step": 749 + }, + { + "epoch": 2.492512479201331, + "loss": 0.8356009721755981, + "loss_ce": 0.000151735614053905, + "loss_iou": 0.2275390625, + "loss_num": 0.076171875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 46961588, + "step": 749 + }, + { + "epoch": 2.4958402662229617, + "grad_norm": 23.632003784179688, + "learning_rate": 5e-06, + "loss": 0.833, + "num_input_tokens_seen": 47024696, + "step": 750 + }, + { + "epoch": 2.4958402662229617, + "eval_seeclick_CIoU": 0.13491493463516235, + "eval_seeclick_GIoU": 0.15883130580186844, + "eval_seeclick_IoU": 0.24382416903972626, + "eval_seeclick_MAE_all": 0.19065534323453903, + "eval_seeclick_MAE_h": 0.05082565359771252, + "eval_seeclick_MAE_w": 0.13937117159366608, + "eval_seeclick_MAE_x_boxes": 0.30929259955883026, + "eval_seeclick_MAE_y_boxes": 0.13166731595993042, + "eval_seeclick_NUM_probability": 0.9999373257160187, + "eval_seeclick_inside_bbox": 0.35208334028720856, + "eval_seeclick_loss": 2.6928210258483887, + "eval_seeclick_loss_ce": 0.08299023285508156, + "eval_seeclick_loss_iou": 0.830810546875, + "eval_seeclick_loss_num": 0.1858978271484375, + "eval_seeclick_loss_xval": 2.59033203125, + "eval_seeclick_runtime": 62.8164, + "eval_seeclick_samples_per_second": 0.748, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 47024696, + "step": 750 + }, + { + "epoch": 2.4958402662229617, + "eval_icons_CIoU": 0.05114769656211138, + "eval_icons_GIoU": 0.15702077746391296, + "eval_icons_IoU": 0.2036093920469284, + "eval_icons_MAE_all": 0.12747248262166977, + "eval_icons_MAE_h": 0.052048404701054096, + "eval_icons_MAE_w": 0.1445429064333439, + "eval_icons_MAE_x_boxes": 0.12461870163679123, + "eval_icons_MAE_y_boxes": 0.046514895744621754, + "eval_icons_NUM_probability": 0.9999918937683105, + "eval_icons_inside_bbox": 0.3576388955116272, + "eval_icons_loss": 2.3343775272369385, + "eval_icons_loss_ce": 2.2014152136762277e-06, + "eval_icons_loss_iou": 0.830078125, + "eval_icons_loss_num": 0.1331043243408203, + "eval_icons_loss_xval": 2.32666015625, + "eval_icons_runtime": 65.6403, + "eval_icons_samples_per_second": 0.762, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 47024696, + "step": 750 + }, + { + "epoch": 2.4958402662229617, + "eval_screenspot_CIoU": 0.039610182866454124, + "eval_screenspot_GIoU": 0.10459545751412709, + "eval_screenspot_IoU": 0.1884987105925878, + "eval_screenspot_MAE_all": 0.20243261754512787, + "eval_screenspot_MAE_h": 0.0592109685142835, + "eval_screenspot_MAE_w": 0.18089259415864944, + "eval_screenspot_MAE_x_boxes": 0.250293031334877, + "eval_screenspot_MAE_y_boxes": 0.1454415942231814, + "eval_screenspot_NUM_probability": 0.9999621907869974, + "eval_screenspot_inside_bbox": 0.3387500047683716, + "eval_screenspot_loss": 2.80856990814209, + "eval_screenspot_loss_ce": 0.00027161008135105175, + "eval_screenspot_loss_iou": 0.9033203125, + "eval_screenspot_loss_num": 0.20716349283854166, + "eval_screenspot_loss_xval": 2.8421223958333335, + "eval_screenspot_runtime": 126.77, + "eval_screenspot_samples_per_second": 0.702, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 47024696, + "step": 750 + }, + { + "epoch": 2.4958402662229617, + "eval_compot_CIoU": -0.054212167859077454, + "eval_compot_GIoU": 0.057059221900999546, + "eval_compot_IoU": 0.11786043643951416, + "eval_compot_MAE_all": 0.2351270318031311, + "eval_compot_MAE_h": 0.06904683262109756, + "eval_compot_MAE_w": 0.2527950033545494, + "eval_compot_MAE_x_boxes": 0.19278910011053085, + "eval_compot_MAE_y_boxes": 0.18391906470060349, + "eval_compot_NUM_probability": 0.9999668002128601, + "eval_compot_inside_bbox": 0.2239583358168602, + "eval_compot_loss": 2.9689693450927734, + "eval_compot_loss_ce": 0.006361398845911026, + "eval_compot_loss_iou": 0.924072265625, + "eval_compot_loss_num": 0.2135009765625, + "eval_compot_loss_xval": 2.916015625, + "eval_compot_runtime": 72.4042, + "eval_compot_samples_per_second": 0.691, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 47024696, + "step": 750 + }, + { + "epoch": 2.4958402662229617, + "eval_custom_ui_MAE_all": 0.09127533435821533, + "eval_custom_ui_MAE_x": 0.09527997672557831, + "eval_custom_ui_MAE_y": 0.08727069199085236, + "eval_custom_ui_NUM_probability": 0.999992311000824, + "eval_custom_ui_loss": 0.4499455690383911, + "eval_custom_ui_loss_ce": 8.726405303605134e-05, + "eval_custom_ui_loss_num": 0.0854949951171875, + "eval_custom_ui_loss_xval": 0.4273681640625, + "eval_custom_ui_runtime": 56.9564, + "eval_custom_ui_samples_per_second": 0.878, + "eval_custom_ui_steps_per_second": 0.035, + "num_input_tokens_seen": 47024696, + "step": 750 + }, + { + "epoch": 2.4958402662229617, + "loss": 0.40762990713119507, + "loss_ce": 0.00015917516429908574, + "loss_iou": 0.0, + "loss_num": 0.08154296875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 47024696, + "step": 750 + }, + { + "epoch": 2.4991680532445923, + "grad_norm": 18.54668426513672, + "learning_rate": 5e-06, + "loss": 0.8896, + "num_input_tokens_seen": 47088064, + "step": 751 + }, + { + "epoch": 2.4991680532445923, + "loss": 0.5935692191123962, + "loss_ce": 0.0016502785729244351, + "loss_iou": 0.16796875, + "loss_num": 0.05126953125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 47088064, + "step": 751 + }, + { + "epoch": 2.502495840266223, + "grad_norm": 11.817997932434082, + "learning_rate": 5e-06, + "loss": 0.8837, + "num_input_tokens_seen": 47150452, + "step": 752 + }, + { + "epoch": 2.502495840266223, + "loss": 0.8448632955551147, + "loss_ce": 0.00044191486085765064, + "loss_iou": 0.30859375, + "loss_num": 0.045654296875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 47150452, + "step": 752 + }, + { + "epoch": 2.5058236272878536, + "grad_norm": 23.670198440551758, + "learning_rate": 5e-06, + "loss": 0.7463, + "num_input_tokens_seen": 47212136, + "step": 753 + }, + { + "epoch": 2.5058236272878536, + "loss": 0.8123268485069275, + "loss_ce": 7.097484922269359e-05, + "loss_iou": 0.28515625, + "loss_num": 0.048583984375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 47212136, + "step": 753 + }, + { + "epoch": 2.509151414309484, + "grad_norm": 19.003517150878906, + "learning_rate": 5e-06, + "loss": 0.7155, + "num_input_tokens_seen": 47275172, + "step": 754 + }, + { + "epoch": 2.509151414309484, + "loss": 0.7601994276046753, + "loss_ce": 0.0021427858155220747, + "loss_iou": 0.255859375, + "loss_num": 0.04931640625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 47275172, + "step": 754 + }, + { + "epoch": 2.512479201331115, + "grad_norm": 42.69208908081055, + "learning_rate": 5e-06, + "loss": 0.8689, + "num_input_tokens_seen": 47336960, + "step": 755 + }, + { + "epoch": 2.512479201331115, + "loss": 1.1230835914611816, + "loss_ce": 0.0015015102690085769, + "loss_iou": 0.3828125, + "loss_num": 0.0712890625, + "loss_xval": 1.125, + "num_input_tokens_seen": 47336960, + "step": 755 + }, + { + "epoch": 2.5158069883527454, + "grad_norm": 18.97686004638672, + "learning_rate": 5e-06, + "loss": 0.9074, + "num_input_tokens_seen": 47400968, + "step": 756 + }, + { + "epoch": 2.5158069883527454, + "loss": 0.8372678756713867, + "loss_ce": 0.000353793177055195, + "loss_iou": 0.26171875, + "loss_num": 0.06298828125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 47400968, + "step": 756 + }, + { + "epoch": 2.519134775374376, + "grad_norm": 19.407001495361328, + "learning_rate": 5e-06, + "loss": 0.7905, + "num_input_tokens_seen": 47462844, + "step": 757 + }, + { + "epoch": 2.519134775374376, + "loss": 0.7666225433349609, + "loss_ce": 0.0009975419379770756, + "loss_iou": 0.171875, + "loss_num": 0.08447265625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 47462844, + "step": 757 + }, + { + "epoch": 2.5224625623960066, + "grad_norm": 21.340974807739258, + "learning_rate": 5e-06, + "loss": 0.8336, + "num_input_tokens_seen": 47524364, + "step": 758 + }, + { + "epoch": 2.5224625623960066, + "loss": 1.158284068107605, + "loss_ce": 0.0013016541488468647, + "loss_iou": 0.361328125, + "loss_num": 0.08740234375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 47524364, + "step": 758 + }, + { + "epoch": 2.5257903494176372, + "grad_norm": 12.445579528808594, + "learning_rate": 5e-06, + "loss": 0.7022, + "num_input_tokens_seen": 47587348, + "step": 759 + }, + { + "epoch": 2.5257903494176372, + "loss": 0.6259897947311401, + "loss_ce": 0.0009897334966808558, + "loss_iou": 0.2109375, + "loss_num": 0.04052734375, + "loss_xval": 0.625, + "num_input_tokens_seen": 47587348, + "step": 759 + }, + { + "epoch": 2.529118136439268, + "grad_norm": 11.766027450561523, + "learning_rate": 5e-06, + "loss": 0.9098, + "num_input_tokens_seen": 47649600, + "step": 760 + }, + { + "epoch": 2.529118136439268, + "loss": 0.6773974895477295, + "loss_ce": 0.0006396822864189744, + "loss_iou": 0.234375, + "loss_num": 0.04150390625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 47649600, + "step": 760 + }, + { + "epoch": 2.5324459234608985, + "grad_norm": 12.828213691711426, + "learning_rate": 5e-06, + "loss": 0.7839, + "num_input_tokens_seen": 47712632, + "step": 761 + }, + { + "epoch": 2.5324459234608985, + "loss": 0.7082505226135254, + "loss_ce": 0.00012061389134032652, + "loss_iou": 0.236328125, + "loss_num": 0.046875, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 47712632, + "step": 761 + }, + { + "epoch": 2.535773710482529, + "grad_norm": 21.702556610107422, + "learning_rate": 5e-06, + "loss": 0.9561, + "num_input_tokens_seen": 47776100, + "step": 762 + }, + { + "epoch": 2.535773710482529, + "loss": 0.7422810792922974, + "loss_ce": 9.361249976791441e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.0537109375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 47776100, + "step": 762 + }, + { + "epoch": 2.5391014975041597, + "grad_norm": 15.470916748046875, + "learning_rate": 5e-06, + "loss": 0.9802, + "num_input_tokens_seen": 47837816, + "step": 763 + }, + { + "epoch": 2.5391014975041597, + "loss": 1.192439317703247, + "loss_ce": 0.000544731505215168, + "loss_iou": 0.390625, + "loss_num": 0.08251953125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 47837816, + "step": 763 + }, + { + "epoch": 2.5424292845257903, + "grad_norm": 21.050203323364258, + "learning_rate": 5e-06, + "loss": 0.906, + "num_input_tokens_seen": 47899704, + "step": 764 + }, + { + "epoch": 2.5424292845257903, + "loss": 0.768609344959259, + "loss_ce": 0.000665017869323492, + "loss_iou": 0.29296875, + "loss_num": 0.036865234375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 47899704, + "step": 764 + }, + { + "epoch": 2.545757071547421, + "grad_norm": 10.072230339050293, + "learning_rate": 5e-06, + "loss": 0.5435, + "num_input_tokens_seen": 47961524, + "step": 765 + }, + { + "epoch": 2.545757071547421, + "loss": 0.4695216119289398, + "loss_ce": 0.0011378447525203228, + "loss_iou": 0.08349609375, + "loss_num": 0.060302734375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 47961524, + "step": 765 + }, + { + "epoch": 2.5490848585690515, + "grad_norm": 7.9047136306762695, + "learning_rate": 5e-06, + "loss": 0.6468, + "num_input_tokens_seen": 48020096, + "step": 766 + }, + { + "epoch": 2.5490848585690515, + "loss": 0.7101080417633057, + "loss_ce": 0.0008795225294306874, + "loss_iou": 0.10986328125, + "loss_num": 0.09814453125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 48020096, + "step": 766 + }, + { + "epoch": 2.552412645590682, + "grad_norm": 9.703407287597656, + "learning_rate": 5e-06, + "loss": 0.5771, + "num_input_tokens_seen": 48082036, + "step": 767 + }, + { + "epoch": 2.552412645590682, + "loss": 0.5914462208747864, + "loss_ce": 0.000625885440967977, + "loss_iou": 0.2119140625, + "loss_num": 0.033447265625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 48082036, + "step": 767 + }, + { + "epoch": 2.5557404326123128, + "grad_norm": 22.55636215209961, + "learning_rate": 5e-06, + "loss": 1.1754, + "num_input_tokens_seen": 48142276, + "step": 768 + }, + { + "epoch": 2.5557404326123128, + "loss": 0.9645742774009705, + "loss_ce": 0.00021879022824577987, + "loss_iou": 0.298828125, + "loss_num": 0.0732421875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 48142276, + "step": 768 + }, + { + "epoch": 2.5590682196339434, + "grad_norm": 23.570049285888672, + "learning_rate": 5e-06, + "loss": 0.7371, + "num_input_tokens_seen": 48204608, + "step": 769 + }, + { + "epoch": 2.5590682196339434, + "loss": 0.6285191774368286, + "loss_ce": 0.0007115625194273889, + "loss_iou": 0.220703125, + "loss_num": 0.037353515625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 48204608, + "step": 769 + }, + { + "epoch": 2.562396006655574, + "grad_norm": 30.504138946533203, + "learning_rate": 5e-06, + "loss": 0.7577, + "num_input_tokens_seen": 48268100, + "step": 770 + }, + { + "epoch": 2.562396006655574, + "loss": 0.6577082872390747, + "loss_ce": 0.00011549589544301853, + "loss_iou": 0.1953125, + "loss_num": 0.0537109375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 48268100, + "step": 770 + }, + { + "epoch": 2.5657237936772046, + "grad_norm": 25.44117546081543, + "learning_rate": 5e-06, + "loss": 0.9569, + "num_input_tokens_seen": 48330448, + "step": 771 + }, + { + "epoch": 2.5657237936772046, + "loss": 0.9606022834777832, + "loss_ce": 0.0006413036026060581, + "loss_iou": 0.318359375, + "loss_num": 0.064453125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 48330448, + "step": 771 + }, + { + "epoch": 2.569051580698835, + "grad_norm": 26.2667293548584, + "learning_rate": 5e-06, + "loss": 0.8804, + "num_input_tokens_seen": 48393420, + "step": 772 + }, + { + "epoch": 2.569051580698835, + "loss": 0.798115074634552, + "loss_ce": 0.004169770982116461, + "loss_iou": 0.291015625, + "loss_num": 0.042724609375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 48393420, + "step": 772 + }, + { + "epoch": 2.572379367720466, + "grad_norm": 11.151239395141602, + "learning_rate": 5e-06, + "loss": 0.6429, + "num_input_tokens_seen": 48455548, + "step": 773 + }, + { + "epoch": 2.572379367720466, + "loss": 0.5176081657409668, + "loss_ce": 0.0030817792285233736, + "loss_iou": 0.146484375, + "loss_num": 0.04443359375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 48455548, + "step": 773 + }, + { + "epoch": 2.5757071547420964, + "grad_norm": 17.321378707885742, + "learning_rate": 5e-06, + "loss": 0.9045, + "num_input_tokens_seen": 48519460, + "step": 774 + }, + { + "epoch": 2.5757071547420964, + "loss": 1.2099051475524902, + "loss_ce": 0.0021415799856185913, + "loss_iou": 0.4296875, + "loss_num": 0.0693359375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 48519460, + "step": 774 + }, + { + "epoch": 2.579034941763727, + "grad_norm": 13.804886817932129, + "learning_rate": 5e-06, + "loss": 0.8553, + "num_input_tokens_seen": 48581736, + "step": 775 + }, + { + "epoch": 2.579034941763727, + "loss": 0.8312492966651917, + "loss_ce": 0.00019460837938822806, + "loss_iou": 0.279296875, + "loss_num": 0.05419921875, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 48581736, + "step": 775 + }, + { + "epoch": 2.5823627287853577, + "grad_norm": 28.333999633789062, + "learning_rate": 5e-06, + "loss": 0.7878, + "num_input_tokens_seen": 48645052, + "step": 776 + }, + { + "epoch": 2.5823627287853577, + "loss": 0.7494910359382629, + "loss_ce": 0.0012000187998637557, + "loss_iou": 0.26171875, + "loss_num": 0.044677734375, + "loss_xval": 0.75, + "num_input_tokens_seen": 48645052, + "step": 776 + }, + { + "epoch": 2.5856905158069883, + "grad_norm": 13.613855361938477, + "learning_rate": 5e-06, + "loss": 0.8614, + "num_input_tokens_seen": 48708828, + "step": 777 + }, + { + "epoch": 2.5856905158069883, + "loss": 0.8165592551231384, + "loss_ce": 0.0006412834627553821, + "loss_iou": 0.314453125, + "loss_num": 0.03759765625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 48708828, + "step": 777 + }, + { + "epoch": 2.589018302828619, + "grad_norm": 19.142955780029297, + "learning_rate": 5e-06, + "loss": 0.829, + "num_input_tokens_seen": 48771620, + "step": 778 + }, + { + "epoch": 2.589018302828619, + "loss": 1.0290508270263672, + "loss_ce": 0.0012187063694000244, + "loss_iou": 0.35546875, + "loss_num": 0.06396484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 48771620, + "step": 778 + }, + { + "epoch": 2.5923460898502495, + "grad_norm": 18.934194564819336, + "learning_rate": 5e-06, + "loss": 0.8152, + "num_input_tokens_seen": 48833540, + "step": 779 + }, + { + "epoch": 2.5923460898502495, + "loss": 0.8391866087913513, + "loss_ce": 0.001295918715186417, + "loss_iou": 0.240234375, + "loss_num": 0.0712890625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 48833540, + "step": 779 + }, + { + "epoch": 2.59567387687188, + "grad_norm": 18.425006866455078, + "learning_rate": 5e-06, + "loss": 0.8615, + "num_input_tokens_seen": 48896456, + "step": 780 + }, + { + "epoch": 2.59567387687188, + "loss": 0.8006330132484436, + "loss_ce": 0.00046212831512093544, + "loss_iou": 0.265625, + "loss_num": 0.0537109375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 48896456, + "step": 780 + }, + { + "epoch": 2.5990016638935107, + "grad_norm": 18.229389190673828, + "learning_rate": 5e-06, + "loss": 0.7638, + "num_input_tokens_seen": 48959876, + "step": 781 + }, + { + "epoch": 2.5990016638935107, + "loss": 0.8060437440872192, + "loss_ce": 0.00013553237658925354, + "loss_iou": 0.26171875, + "loss_num": 0.056640625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 48959876, + "step": 781 + }, + { + "epoch": 2.6023294509151413, + "grad_norm": 10.035086631774902, + "learning_rate": 5e-06, + "loss": 0.5497, + "num_input_tokens_seen": 49020532, + "step": 782 + }, + { + "epoch": 2.6023294509151413, + "loss": 0.5629553198814392, + "loss_ce": 8.909497410058975e-05, + "loss_iou": 0.12451171875, + "loss_num": 0.0625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 49020532, + "step": 782 + }, + { + "epoch": 2.605657237936772, + "grad_norm": 26.17829704284668, + "learning_rate": 5e-06, + "loss": 0.7714, + "num_input_tokens_seen": 49082484, + "step": 783 + }, + { + "epoch": 2.605657237936772, + "loss": 0.8501694202423096, + "loss_ce": 0.0005600237054750323, + "loss_iou": 0.318359375, + "loss_num": 0.042724609375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 49082484, + "step": 783 + }, + { + "epoch": 2.6089850249584026, + "grad_norm": 22.57184410095215, + "learning_rate": 5e-06, + "loss": 0.9777, + "num_input_tokens_seen": 49145952, + "step": 784 + }, + { + "epoch": 2.6089850249584026, + "loss": 0.7366130352020264, + "loss_ce": 4.0785289456835017e-05, + "loss_iou": 0.212890625, + "loss_num": 0.062255859375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 49145952, + "step": 784 + }, + { + "epoch": 2.612312811980033, + "grad_norm": 12.318331718444824, + "learning_rate": 5e-06, + "loss": 0.8321, + "num_input_tokens_seen": 49206352, + "step": 785 + }, + { + "epoch": 2.612312811980033, + "loss": 0.8576995134353638, + "loss_ce": 0.004183888901025057, + "loss_iou": 0.310546875, + "loss_num": 0.04638671875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 49206352, + "step": 785 + }, + { + "epoch": 2.615640599001664, + "grad_norm": 11.607172966003418, + "learning_rate": 5e-06, + "loss": 0.7514, + "num_input_tokens_seen": 49268404, + "step": 786 + }, + { + "epoch": 2.615640599001664, + "loss": 0.8183543086051941, + "loss_ce": 0.0019480856135487556, + "loss_iou": 0.28515625, + "loss_num": 0.049072265625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 49268404, + "step": 786 + }, + { + "epoch": 2.6189683860232944, + "grad_norm": 14.248104095458984, + "learning_rate": 5e-06, + "loss": 0.875, + "num_input_tokens_seen": 49332036, + "step": 787 + }, + { + "epoch": 2.6189683860232944, + "loss": 0.7565048336982727, + "loss_ce": 0.0011337447213009, + "loss_iou": 0.251953125, + "loss_num": 0.050537109375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 49332036, + "step": 787 + }, + { + "epoch": 2.622296173044925, + "grad_norm": 17.36334800720215, + "learning_rate": 5e-06, + "loss": 0.7014, + "num_input_tokens_seen": 49394740, + "step": 788 + }, + { + "epoch": 2.622296173044925, + "loss": 0.5740655064582825, + "loss_ce": 0.0005791678559035063, + "loss_iou": 0.185546875, + "loss_num": 0.040283203125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 49394740, + "step": 788 + }, + { + "epoch": 2.6256239600665556, + "grad_norm": 20.616777420043945, + "learning_rate": 5e-06, + "loss": 0.8368, + "num_input_tokens_seen": 49456944, + "step": 789 + }, + { + "epoch": 2.6256239600665556, + "loss": 0.8422057628631592, + "loss_ce": 0.00028675797511823475, + "loss_iou": 0.279296875, + "loss_num": 0.056396484375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 49456944, + "step": 789 + }, + { + "epoch": 2.6289517470881862, + "grad_norm": 19.112476348876953, + "learning_rate": 5e-06, + "loss": 0.9446, + "num_input_tokens_seen": 49520124, + "step": 790 + }, + { + "epoch": 2.6289517470881862, + "loss": 0.8822909593582153, + "loss_ce": 8.882155816536397e-05, + "loss_iou": 0.3203125, + "loss_num": 0.048095703125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 49520124, + "step": 790 + }, + { + "epoch": 2.632279534109817, + "grad_norm": 11.602209091186523, + "learning_rate": 5e-06, + "loss": 0.9648, + "num_input_tokens_seen": 49583060, + "step": 791 + }, + { + "epoch": 2.632279534109817, + "loss": 0.8904417753219604, + "loss_ce": 0.001037474605254829, + "loss_iou": 0.291015625, + "loss_num": 0.061279296875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 49583060, + "step": 791 + }, + { + "epoch": 2.6356073211314475, + "grad_norm": 19.73798179626465, + "learning_rate": 5e-06, + "loss": 0.9234, + "num_input_tokens_seen": 49643248, + "step": 792 + }, + { + "epoch": 2.6356073211314475, + "loss": 0.8780515193939209, + "loss_ce": 0.00012182131467852741, + "loss_iou": 0.296875, + "loss_num": 0.056640625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 49643248, + "step": 792 + }, + { + "epoch": 2.638935108153078, + "grad_norm": 31.99167251586914, + "learning_rate": 5e-06, + "loss": 1.2363, + "num_input_tokens_seen": 49706308, + "step": 793 + }, + { + "epoch": 2.638935108153078, + "loss": 1.3618634939193726, + "loss_ce": 4.7138204536167905e-05, + "loss_iou": 0.4609375, + "loss_num": 0.08837890625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 49706308, + "step": 793 + }, + { + "epoch": 2.6422628951747087, + "grad_norm": 27.228073120117188, + "learning_rate": 5e-06, + "loss": 0.9539, + "num_input_tokens_seen": 49769864, + "step": 794 + }, + { + "epoch": 2.6422628951747087, + "loss": 1.108590006828308, + "loss_ce": 0.00019164662808179855, + "loss_iou": 0.37890625, + "loss_num": 0.06982421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 49769864, + "step": 794 + }, + { + "epoch": 2.6455906821963393, + "grad_norm": 21.955646514892578, + "learning_rate": 5e-06, + "loss": 1.0233, + "num_input_tokens_seen": 49834096, + "step": 795 + }, + { + "epoch": 2.6455906821963393, + "loss": 0.9163686037063599, + "loss_ce": 0.00010885349911404774, + "loss_iou": 0.291015625, + "loss_num": 0.0673828125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 49834096, + "step": 795 + }, + { + "epoch": 2.64891846921797, + "grad_norm": 17.316287994384766, + "learning_rate": 5e-06, + "loss": 0.6946, + "num_input_tokens_seen": 49897404, + "step": 796 + }, + { + "epoch": 2.64891846921797, + "loss": 0.49226897954940796, + "loss_ce": 0.0015463390154764056, + "loss_iou": 0.12060546875, + "loss_num": 0.050048828125, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 49897404, + "step": 796 + }, + { + "epoch": 2.6522462562396005, + "grad_norm": 9.91407299041748, + "learning_rate": 5e-06, + "loss": 0.9557, + "num_input_tokens_seen": 49960132, + "step": 797 + }, + { + "epoch": 2.6522462562396005, + "loss": 1.2538728713989258, + "loss_ce": 0.0002108162734657526, + "loss_iou": 0.466796875, + "loss_num": 0.0634765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 49960132, + "step": 797 + }, + { + "epoch": 2.655574043261231, + "grad_norm": 13.048734664916992, + "learning_rate": 5e-06, + "loss": 0.9346, + "num_input_tokens_seen": 50021972, + "step": 798 + }, + { + "epoch": 2.655574043261231, + "loss": 0.8867144584655762, + "loss_ce": 0.00023978884564712644, + "loss_iou": 0.267578125, + "loss_num": 0.0703125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 50021972, + "step": 798 + }, + { + "epoch": 2.6589018302828618, + "grad_norm": 20.037155151367188, + "learning_rate": 5e-06, + "loss": 1.0502, + "num_input_tokens_seen": 50084912, + "step": 799 + }, + { + "epoch": 2.6589018302828618, + "loss": 1.0424444675445557, + "loss_ce": 0.00020819506607949734, + "loss_iou": 0.365234375, + "loss_num": 0.06201171875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 50084912, + "step": 799 + }, + { + "epoch": 2.6622296173044924, + "grad_norm": 23.562681198120117, + "learning_rate": 5e-06, + "loss": 1.143, + "num_input_tokens_seen": 50148712, + "step": 800 + }, + { + "epoch": 2.6622296173044924, + "loss": 1.1583603620529175, + "loss_ce": 0.001622087205760181, + "loss_iou": 0.375, + "loss_num": 0.0810546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 50148712, + "step": 800 + }, + { + "epoch": 2.665557404326123, + "grad_norm": 12.925877571105957, + "learning_rate": 5e-06, + "loss": 0.807, + "num_input_tokens_seen": 50210512, + "step": 801 + }, + { + "epoch": 2.665557404326123, + "loss": 1.0540846586227417, + "loss_ce": 0.0003736392827704549, + "loss_iou": 0.390625, + "loss_num": 0.054443359375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 50210512, + "step": 801 + }, + { + "epoch": 2.6688851913477536, + "grad_norm": 11.798089027404785, + "learning_rate": 5e-06, + "loss": 0.9187, + "num_input_tokens_seen": 50274604, + "step": 802 + }, + { + "epoch": 2.6688851913477536, + "loss": 0.7706761956214905, + "loss_ce": 0.00041252042865380645, + "loss_iou": 0.26953125, + "loss_num": 0.046630859375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 50274604, + "step": 802 + }, + { + "epoch": 2.672212978369384, + "grad_norm": 9.151251792907715, + "learning_rate": 5e-06, + "loss": 0.5951, + "num_input_tokens_seen": 50336456, + "step": 803 + }, + { + "epoch": 2.672212978369384, + "loss": 0.6084632873535156, + "loss_ce": 6.486372876679525e-05, + "loss_iou": 0.212890625, + "loss_num": 0.03662109375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 50336456, + "step": 803 + }, + { + "epoch": 2.675540765391015, + "grad_norm": 66.70578002929688, + "learning_rate": 5e-06, + "loss": 0.7686, + "num_input_tokens_seen": 50400404, + "step": 804 + }, + { + "epoch": 2.675540765391015, + "loss": 0.7586301565170288, + "loss_ce": 8.528407488483936e-05, + "loss_iou": 0.29296875, + "loss_num": 0.034912109375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 50400404, + "step": 804 + }, + { + "epoch": 2.6788685524126454, + "grad_norm": 14.901700019836426, + "learning_rate": 5e-06, + "loss": 0.8289, + "num_input_tokens_seen": 50464096, + "step": 805 + }, + { + "epoch": 2.6788685524126454, + "loss": 0.9449521899223328, + "loss_ce": 0.0013486790703609586, + "loss_iou": 0.359375, + "loss_num": 0.04541015625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 50464096, + "step": 805 + }, + { + "epoch": 2.682196339434276, + "grad_norm": 16.723764419555664, + "learning_rate": 5e-06, + "loss": 0.6458, + "num_input_tokens_seen": 50526736, + "step": 806 + }, + { + "epoch": 2.682196339434276, + "loss": 0.7537314891815186, + "loss_ce": 0.0008018329390324652, + "loss_iou": 0.279296875, + "loss_num": 0.03857421875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 50526736, + "step": 806 + }, + { + "epoch": 2.6855241264559067, + "grad_norm": 15.801563262939453, + "learning_rate": 5e-06, + "loss": 0.6904, + "num_input_tokens_seen": 50589548, + "step": 807 + }, + { + "epoch": 2.6855241264559067, + "loss": 0.5793936252593994, + "loss_ce": 4.7907709813443944e-05, + "loss_iou": 0.16015625, + "loss_num": 0.052001953125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 50589548, + "step": 807 + }, + { + "epoch": 2.6888519134775377, + "grad_norm": 12.457715034484863, + "learning_rate": 5e-06, + "loss": 1.0248, + "num_input_tokens_seen": 50650376, + "step": 808 + }, + { + "epoch": 2.6888519134775377, + "loss": 1.0397467613220215, + "loss_ce": 0.00019596036872826517, + "loss_iou": 0.37890625, + "loss_num": 0.05615234375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 50650376, + "step": 808 + }, + { + "epoch": 2.6921797004991683, + "grad_norm": 23.163949966430664, + "learning_rate": 5e-06, + "loss": 0.6915, + "num_input_tokens_seen": 50713052, + "step": 809 + }, + { + "epoch": 2.6921797004991683, + "loss": 0.7694122791290283, + "loss_ce": 0.00012515315029304475, + "loss_iou": 0.28515625, + "loss_num": 0.0400390625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 50713052, + "step": 809 + }, + { + "epoch": 2.695507487520799, + "grad_norm": 18.702228546142578, + "learning_rate": 5e-06, + "loss": 0.9216, + "num_input_tokens_seen": 50776796, + "step": 810 + }, + { + "epoch": 2.695507487520799, + "loss": 0.9874943494796753, + "loss_ce": 0.00018971107783727348, + "loss_iou": 0.3671875, + "loss_num": 0.051025390625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 50776796, + "step": 810 + }, + { + "epoch": 2.6988352745424296, + "grad_norm": 9.986546516418457, + "learning_rate": 5e-06, + "loss": 0.9254, + "num_input_tokens_seen": 50839964, + "step": 811 + }, + { + "epoch": 2.6988352745424296, + "loss": 0.8390403985977173, + "loss_ce": 5.112246435601264e-05, + "loss_iou": 0.265625, + "loss_num": 0.0615234375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 50839964, + "step": 811 + }, + { + "epoch": 2.70216306156406, + "grad_norm": 15.212403297424316, + "learning_rate": 5e-06, + "loss": 0.8605, + "num_input_tokens_seen": 50901932, + "step": 812 + }, + { + "epoch": 2.70216306156406, + "loss": 0.7976114749908447, + "loss_ce": 0.0004923460073769093, + "loss_iou": 0.26953125, + "loss_num": 0.0517578125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 50901932, + "step": 812 + }, + { + "epoch": 2.7054908485856908, + "grad_norm": 25.891193389892578, + "learning_rate": 5e-06, + "loss": 0.8721, + "num_input_tokens_seen": 50964524, + "step": 813 + }, + { + "epoch": 2.7054908485856908, + "loss": 0.8218099474906921, + "loss_ce": 3.2617710530757904e-05, + "loss_iou": 0.1953125, + "loss_num": 0.08642578125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 50964524, + "step": 813 + }, + { + "epoch": 2.7088186356073214, + "grad_norm": 15.289278984069824, + "learning_rate": 5e-06, + "loss": 0.7168, + "num_input_tokens_seen": 51027040, + "step": 814 + }, + { + "epoch": 2.7088186356073214, + "loss": 0.5973207950592041, + "loss_ce": 0.0008241605246439576, + "loss_iou": 0.193359375, + "loss_num": 0.0419921875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 51027040, + "step": 814 + }, + { + "epoch": 2.712146422628952, + "grad_norm": 12.39792537689209, + "learning_rate": 5e-06, + "loss": 0.803, + "num_input_tokens_seen": 51089884, + "step": 815 + }, + { + "epoch": 2.712146422628952, + "loss": 0.7842392325401306, + "loss_ce": 0.0014480899553745985, + "loss_iou": 0.2353515625, + "loss_num": 0.0625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 51089884, + "step": 815 + }, + { + "epoch": 2.7154742096505826, + "grad_norm": 74.54549407958984, + "learning_rate": 5e-06, + "loss": 0.8014, + "num_input_tokens_seen": 51152684, + "step": 816 + }, + { + "epoch": 2.7154742096505826, + "loss": 0.8306292295455933, + "loss_ce": 0.000184907199582085, + "loss_iou": 0.279296875, + "loss_num": 0.054443359375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 51152684, + "step": 816 + }, + { + "epoch": 2.7188019966722132, + "grad_norm": 11.50522518157959, + "learning_rate": 5e-06, + "loss": 0.7829, + "num_input_tokens_seen": 51214500, + "step": 817 + }, + { + "epoch": 2.7188019966722132, + "loss": 0.881805419921875, + "loss_ce": 0.00045781210064888, + "loss_iou": 0.28515625, + "loss_num": 0.06201171875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 51214500, + "step": 817 + }, + { + "epoch": 2.722129783693844, + "grad_norm": 13.687999725341797, + "learning_rate": 5e-06, + "loss": 0.8651, + "num_input_tokens_seen": 51276604, + "step": 818 + }, + { + "epoch": 2.722129783693844, + "loss": 0.8142944574356079, + "loss_ce": 0.0005737675237469375, + "loss_iou": 0.28515625, + "loss_num": 0.048828125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 51276604, + "step": 818 + }, + { + "epoch": 2.7254575707154745, + "grad_norm": 5.8822712898254395, + "learning_rate": 5e-06, + "loss": 0.5399, + "num_input_tokens_seen": 51337268, + "step": 819 + }, + { + "epoch": 2.7254575707154745, + "loss": 0.4156995415687561, + "loss_ce": 0.00017218466382473707, + "loss_iou": 0.1279296875, + "loss_num": 0.03173828125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 51337268, + "step": 819 + }, + { + "epoch": 2.728785357737105, + "grad_norm": 10.577842712402344, + "learning_rate": 5e-06, + "loss": 0.5331, + "num_input_tokens_seen": 51398040, + "step": 820 + }, + { + "epoch": 2.728785357737105, + "loss": 0.44522571563720703, + "loss_ce": 0.0008897931547835469, + "loss_iou": 0.10986328125, + "loss_num": 0.044921875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 51398040, + "step": 820 + }, + { + "epoch": 2.7321131447587357, + "grad_norm": 14.673174858093262, + "learning_rate": 5e-06, + "loss": 0.9165, + "num_input_tokens_seen": 51459948, + "step": 821 + }, + { + "epoch": 2.7321131447587357, + "loss": 1.0592856407165527, + "loss_ce": 0.00020362591021694243, + "loss_iou": 0.32421875, + "loss_num": 0.08203125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 51459948, + "step": 821 + }, + { + "epoch": 2.7354409317803663, + "grad_norm": 10.287886619567871, + "learning_rate": 5e-06, + "loss": 0.8561, + "num_input_tokens_seen": 51524140, + "step": 822 + }, + { + "epoch": 2.7354409317803663, + "loss": 0.8826354742050171, + "loss_ce": 0.0007995132473297417, + "loss_iou": 0.2890625, + "loss_num": 0.060302734375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 51524140, + "step": 822 + }, + { + "epoch": 2.738768718801997, + "grad_norm": 9.983254432678223, + "learning_rate": 5e-06, + "loss": 0.823, + "num_input_tokens_seen": 51587172, + "step": 823 + }, + { + "epoch": 2.738768718801997, + "loss": 1.0295748710632324, + "loss_ce": 3.3884258300531656e-05, + "loss_iou": 0.3125, + "loss_num": 0.08056640625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 51587172, + "step": 823 + }, + { + "epoch": 2.7420965058236275, + "grad_norm": 10.106569290161133, + "learning_rate": 5e-06, + "loss": 0.8517, + "num_input_tokens_seen": 51649148, + "step": 824 + }, + { + "epoch": 2.7420965058236275, + "loss": 0.7791653275489807, + "loss_ce": 0.0006008880445733666, + "loss_iou": 0.30859375, + "loss_num": 0.031982421875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 51649148, + "step": 824 + }, + { + "epoch": 2.745424292845258, + "grad_norm": 20.371871948242188, + "learning_rate": 5e-06, + "loss": 1.0496, + "num_input_tokens_seen": 51712836, + "step": 825 + }, + { + "epoch": 2.745424292845258, + "loss": 0.855181097984314, + "loss_ce": 0.01924358680844307, + "loss_iou": 0.294921875, + "loss_num": 0.049560546875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 51712836, + "step": 825 + }, + { + "epoch": 2.7487520798668887, + "grad_norm": 12.943856239318848, + "learning_rate": 5e-06, + "loss": 0.8533, + "num_input_tokens_seen": 51775596, + "step": 826 + }, + { + "epoch": 2.7487520798668887, + "loss": 0.914795994758606, + "loss_ce": 0.0004892984288744628, + "loss_iou": 0.318359375, + "loss_num": 0.0556640625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 51775596, + "step": 826 + }, + { + "epoch": 2.7520798668885194, + "grad_norm": 8.180317878723145, + "learning_rate": 5e-06, + "loss": 0.6992, + "num_input_tokens_seen": 51838420, + "step": 827 + }, + { + "epoch": 2.7520798668885194, + "loss": 0.7848867177963257, + "loss_ce": 0.0020497734658420086, + "loss_iou": 0.26953125, + "loss_num": 0.049072265625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 51838420, + "step": 827 + }, + { + "epoch": 2.75540765391015, + "grad_norm": 15.182528495788574, + "learning_rate": 5e-06, + "loss": 0.8481, + "num_input_tokens_seen": 51901276, + "step": 828 + }, + { + "epoch": 2.75540765391015, + "loss": 0.7813282012939453, + "loss_ce": 0.002519632689654827, + "loss_iou": 0.21484375, + "loss_num": 0.06982421875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 51901276, + "step": 828 + }, + { + "epoch": 2.7587354409317806, + "grad_norm": 7.6279377937316895, + "learning_rate": 5e-06, + "loss": 0.6484, + "num_input_tokens_seen": 51962060, + "step": 829 + }, + { + "epoch": 2.7587354409317806, + "loss": 0.7650944590568542, + "loss_ce": 0.00203291280195117, + "loss_iou": 0.23046875, + "loss_num": 0.06005859375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 51962060, + "step": 829 + }, + { + "epoch": 2.762063227953411, + "grad_norm": 10.674771308898926, + "learning_rate": 5e-06, + "loss": 0.817, + "num_input_tokens_seen": 52025592, + "step": 830 + }, + { + "epoch": 2.762063227953411, + "loss": 0.7599319815635681, + "loss_ce": 0.00016639150271657854, + "loss_iou": 0.296875, + "loss_num": 0.033203125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 52025592, + "step": 830 + }, + { + "epoch": 2.765391014975042, + "grad_norm": 19.160572052001953, + "learning_rate": 5e-06, + "loss": 0.9251, + "num_input_tokens_seen": 52088076, + "step": 831 + }, + { + "epoch": 2.765391014975042, + "loss": 0.805517315864563, + "loss_ce": 0.0005856447969563305, + "loss_iou": 0.283203125, + "loss_num": 0.047607421875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 52088076, + "step": 831 + }, + { + "epoch": 2.7687188019966724, + "grad_norm": 6.1606125831604, + "learning_rate": 5e-06, + "loss": 0.5771, + "num_input_tokens_seen": 52150536, + "step": 832 + }, + { + "epoch": 2.7687188019966724, + "loss": 0.6053818464279175, + "loss_ce": 0.0006455372204072773, + "loss_iou": 0.1259765625, + "loss_num": 0.0703125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 52150536, + "step": 832 + }, + { + "epoch": 2.772046589018303, + "grad_norm": 22.167266845703125, + "learning_rate": 5e-06, + "loss": 0.9371, + "num_input_tokens_seen": 52213100, + "step": 833 + }, + { + "epoch": 2.772046589018303, + "loss": 0.9295365810394287, + "loss_ce": 0.00021526089403778315, + "loss_iou": 0.302734375, + "loss_num": 0.0654296875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 52213100, + "step": 833 + }, + { + "epoch": 2.7753743760399336, + "grad_norm": 10.496980667114258, + "learning_rate": 5e-06, + "loss": 0.6609, + "num_input_tokens_seen": 52276464, + "step": 834 + }, + { + "epoch": 2.7753743760399336, + "loss": 0.5460642576217651, + "loss_ce": 0.00016583751130383462, + "loss_iou": 0.2197265625, + "loss_num": 0.021240234375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 52276464, + "step": 834 + }, + { + "epoch": 2.7787021630615643, + "grad_norm": 10.732155799865723, + "learning_rate": 5e-06, + "loss": 0.8073, + "num_input_tokens_seen": 52340528, + "step": 835 + }, + { + "epoch": 2.7787021630615643, + "loss": 0.6755124926567078, + "loss_ce": 0.0002195164270233363, + "loss_iou": 0.197265625, + "loss_num": 0.055908203125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 52340528, + "step": 835 + }, + { + "epoch": 2.782029950083195, + "grad_norm": 18.337326049804688, + "learning_rate": 5e-06, + "loss": 0.7644, + "num_input_tokens_seen": 52403600, + "step": 836 + }, + { + "epoch": 2.782029950083195, + "loss": 0.5846283435821533, + "loss_ce": 0.0003998374450020492, + "loss_iou": 0.1669921875, + "loss_num": 0.0498046875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 52403600, + "step": 836 + }, + { + "epoch": 2.7853577371048255, + "grad_norm": 68.70188903808594, + "learning_rate": 5e-06, + "loss": 0.884, + "num_input_tokens_seen": 52466592, + "step": 837 + }, + { + "epoch": 2.7853577371048255, + "loss": 1.1231420040130615, + "loss_ce": 0.0011938156094402075, + "loss_iou": 0.384765625, + "loss_num": 0.0703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 52466592, + "step": 837 + }, + { + "epoch": 2.788685524126456, + "grad_norm": 35.47993850708008, + "learning_rate": 5e-06, + "loss": 0.9647, + "num_input_tokens_seen": 52529452, + "step": 838 + }, + { + "epoch": 2.788685524126456, + "loss": 0.9949101209640503, + "loss_ce": 0.0007694980013184249, + "loss_iou": 0.3203125, + "loss_num": 0.07080078125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 52529452, + "step": 838 + }, + { + "epoch": 2.7920133111480867, + "grad_norm": 21.51439094543457, + "learning_rate": 5e-06, + "loss": 0.8653, + "num_input_tokens_seen": 52592540, + "step": 839 + }, + { + "epoch": 2.7920133111480867, + "loss": 0.947022020816803, + "loss_ce": 0.0014653451507911086, + "loss_iou": 0.345703125, + "loss_num": 0.05078125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 52592540, + "step": 839 + }, + { + "epoch": 2.7953410981697173, + "grad_norm": 16.271461486816406, + "learning_rate": 5e-06, + "loss": 0.7983, + "num_input_tokens_seen": 52654704, + "step": 840 + }, + { + "epoch": 2.7953410981697173, + "loss": 0.980492353439331, + "loss_ce": 0.0014884801348671317, + "loss_iou": 0.28125, + "loss_num": 0.08349609375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 52654704, + "step": 840 + }, + { + "epoch": 2.798668885191348, + "grad_norm": 10.07279109954834, + "learning_rate": 5e-06, + "loss": 0.5959, + "num_input_tokens_seen": 52716292, + "step": 841 + }, + { + "epoch": 2.798668885191348, + "loss": 0.4731648564338684, + "loss_ce": 0.000630666094366461, + "loss_iou": 0.1494140625, + "loss_num": 0.034912109375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 52716292, + "step": 841 + }, + { + "epoch": 2.8019966722129785, + "grad_norm": 13.416818618774414, + "learning_rate": 5e-06, + "loss": 0.9959, + "num_input_tokens_seen": 52780908, + "step": 842 + }, + { + "epoch": 2.8019966722129785, + "loss": 1.2821091413497925, + "loss_ce": 0.0006150341359898448, + "loss_iou": 0.42578125, + "loss_num": 0.0859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 52780908, + "step": 842 + }, + { + "epoch": 2.805324459234609, + "grad_norm": 23.089929580688477, + "learning_rate": 5e-06, + "loss": 0.6385, + "num_input_tokens_seen": 52842208, + "step": 843 + }, + { + "epoch": 2.805324459234609, + "loss": 0.59052574634552, + "loss_ce": 0.0006820209673605859, + "loss_iou": 0.1962890625, + "loss_num": 0.03955078125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 52842208, + "step": 843 + }, + { + "epoch": 2.8086522462562398, + "grad_norm": 18.833816528320312, + "learning_rate": 5e-06, + "loss": 0.9746, + "num_input_tokens_seen": 52904996, + "step": 844 + }, + { + "epoch": 2.8086522462562398, + "loss": 1.0083526372909546, + "loss_ce": 5.187302303966135e-05, + "loss_iou": 0.388671875, + "loss_num": 0.046142578125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 52904996, + "step": 844 + }, + { + "epoch": 2.8119800332778704, + "grad_norm": 14.17634105682373, + "learning_rate": 5e-06, + "loss": 0.884, + "num_input_tokens_seen": 52969668, + "step": 845 + }, + { + "epoch": 2.8119800332778704, + "loss": 1.1289470195770264, + "loss_ce": 0.0019939455669373274, + "loss_iou": 0.439453125, + "loss_num": 0.050048828125, + "loss_xval": 1.125, + "num_input_tokens_seen": 52969668, + "step": 845 + }, + { + "epoch": 2.815307820299501, + "grad_norm": 23.1187801361084, + "learning_rate": 5e-06, + "loss": 0.7, + "num_input_tokens_seen": 53031812, + "step": 846 + }, + { + "epoch": 2.815307820299501, + "loss": 0.7373853921890259, + "loss_ce": 8.065036672633141e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.048583984375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 53031812, + "step": 846 + }, + { + "epoch": 2.8186356073211316, + "grad_norm": 9.363740921020508, + "learning_rate": 5e-06, + "loss": 0.9656, + "num_input_tokens_seen": 53095744, + "step": 847 + }, + { + "epoch": 2.8186356073211316, + "loss": 1.1773841381072998, + "loss_ce": 0.0018469768110662699, + "loss_iou": 0.384765625, + "loss_num": 0.08154296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 53095744, + "step": 847 + }, + { + "epoch": 2.821963394342762, + "grad_norm": 11.104792594909668, + "learning_rate": 5e-06, + "loss": 0.8244, + "num_input_tokens_seen": 53158744, + "step": 848 + }, + { + "epoch": 2.821963394342762, + "loss": 0.9474537372589111, + "loss_ce": 0.0006763727869838476, + "loss_iou": 0.2578125, + "loss_num": 0.08642578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 53158744, + "step": 848 + }, + { + "epoch": 2.825291181364393, + "grad_norm": 12.321479797363281, + "learning_rate": 5e-06, + "loss": 0.9694, + "num_input_tokens_seen": 53221352, + "step": 849 + }, + { + "epoch": 2.825291181364393, + "loss": 0.9421533346176147, + "loss_ce": 0.0009911877568811178, + "loss_iou": 0.26953125, + "loss_num": 0.0810546875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 53221352, + "step": 849 + }, + { + "epoch": 2.8286189683860234, + "grad_norm": 12.530994415283203, + "learning_rate": 5e-06, + "loss": 0.9866, + "num_input_tokens_seen": 53285452, + "step": 850 + }, + { + "epoch": 2.8286189683860234, + "loss": 1.0217738151550293, + "loss_ce": 0.001021851203404367, + "loss_iou": 0.375, + "loss_num": 0.05419921875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 53285452, + "step": 850 + }, + { + "epoch": 2.831946755407654, + "grad_norm": 15.440099716186523, + "learning_rate": 5e-06, + "loss": 0.8441, + "num_input_tokens_seen": 53346748, + "step": 851 + }, + { + "epoch": 2.831946755407654, + "loss": 0.7872898578643799, + "loss_ce": 0.00018045374599751085, + "loss_iou": 0.2197265625, + "loss_num": 0.0693359375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 53346748, + "step": 851 + }, + { + "epoch": 2.8352745424292847, + "grad_norm": 28.69334602355957, + "learning_rate": 5e-06, + "loss": 0.8573, + "num_input_tokens_seen": 53409872, + "step": 852 + }, + { + "epoch": 2.8352745424292847, + "loss": 0.8631025552749634, + "loss_ce": 0.002262737834826112, + "loss_iou": 0.26171875, + "loss_num": 0.0673828125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 53409872, + "step": 852 + }, + { + "epoch": 2.8386023294509153, + "grad_norm": 12.272283554077148, + "learning_rate": 5e-06, + "loss": 0.8724, + "num_input_tokens_seen": 53471764, + "step": 853 + }, + { + "epoch": 2.8386023294509153, + "loss": 1.037917971611023, + "loss_ce": 0.0020293924026191235, + "loss_iou": 0.3125, + "loss_num": 0.08203125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 53471764, + "step": 853 + }, + { + "epoch": 2.841930116472546, + "grad_norm": 19.907730102539062, + "learning_rate": 5e-06, + "loss": 0.8915, + "num_input_tokens_seen": 53535256, + "step": 854 + }, + { + "epoch": 2.841930116472546, + "loss": 1.0508040189743042, + "loss_ce": 0.0014875233173370361, + "loss_iou": 0.40234375, + "loss_num": 0.049560546875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 53535256, + "step": 854 + }, + { + "epoch": 2.8452579034941765, + "grad_norm": 16.519927978515625, + "learning_rate": 5e-06, + "loss": 0.8323, + "num_input_tokens_seen": 53598600, + "step": 855 + }, + { + "epoch": 2.8452579034941765, + "loss": 0.9220311045646667, + "loss_ce": 0.0006444402388297021, + "loss_iou": 0.306640625, + "loss_num": 0.061279296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 53598600, + "step": 855 + }, + { + "epoch": 2.848585690515807, + "grad_norm": 21.156627655029297, + "learning_rate": 5e-06, + "loss": 0.8297, + "num_input_tokens_seen": 53660192, + "step": 856 + }, + { + "epoch": 2.848585690515807, + "loss": 0.6860744953155518, + "loss_ce": 3.937574365409091e-05, + "loss_iou": 0.25390625, + "loss_num": 0.03564453125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 53660192, + "step": 856 + }, + { + "epoch": 2.8519134775374377, + "grad_norm": 15.835363388061523, + "learning_rate": 5e-06, + "loss": 1.1557, + "num_input_tokens_seen": 53723952, + "step": 857 + }, + { + "epoch": 2.8519134775374377, + "loss": 1.1079440116882324, + "loss_ce": 0.0005221446044743061, + "loss_iou": 0.375, + "loss_num": 0.0712890625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 53723952, + "step": 857 + }, + { + "epoch": 2.8552412645590683, + "grad_norm": 9.046875, + "learning_rate": 5e-06, + "loss": 0.8038, + "num_input_tokens_seen": 53785616, + "step": 858 + }, + { + "epoch": 2.8552412645590683, + "loss": 0.8684693574905396, + "loss_ce": 0.0004273601807653904, + "loss_iou": 0.326171875, + "loss_num": 0.04345703125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 53785616, + "step": 858 + }, + { + "epoch": 2.858569051580699, + "grad_norm": 46.66500473022461, + "learning_rate": 5e-06, + "loss": 1.2536, + "num_input_tokens_seen": 53849740, + "step": 859 + }, + { + "epoch": 2.858569051580699, + "loss": 1.047816276550293, + "loss_ce": 0.0068005844950675964, + "loss_iou": 0.3828125, + "loss_num": 0.05517578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 53849740, + "step": 859 + }, + { + "epoch": 2.8618968386023296, + "grad_norm": 25.487953186035156, + "learning_rate": 5e-06, + "loss": 0.8454, + "num_input_tokens_seen": 53911752, + "step": 860 + }, + { + "epoch": 2.8618968386023296, + "loss": 0.7410845160484314, + "loss_ce": 0.00017875817138701677, + "loss_iou": 0.208984375, + "loss_num": 0.06494140625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 53911752, + "step": 860 + }, + { + "epoch": 2.86522462562396, + "grad_norm": 8.039831161499023, + "learning_rate": 5e-06, + "loss": 0.4574, + "num_input_tokens_seen": 53973596, + "step": 861 + }, + { + "epoch": 2.86522462562396, + "loss": 0.4054723381996155, + "loss_ce": 0.00019890097610186785, + "loss_iou": 0.107421875, + "loss_num": 0.038330078125, + "loss_xval": 0.40625, + "num_input_tokens_seen": 53973596, + "step": 861 + }, + { + "epoch": 2.868552412645591, + "grad_norm": 12.18427562713623, + "learning_rate": 5e-06, + "loss": 0.777, + "num_input_tokens_seen": 54037804, + "step": 862 + }, + { + "epoch": 2.868552412645591, + "loss": 0.7093939781188965, + "loss_ce": 0.004559976048767567, + "loss_iou": 0.251953125, + "loss_num": 0.039794921875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 54037804, + "step": 862 + }, + { + "epoch": 2.8718801996672214, + "grad_norm": 11.70964527130127, + "learning_rate": 5e-06, + "loss": 0.8396, + "num_input_tokens_seen": 54099912, + "step": 863 + }, + { + "epoch": 2.8718801996672214, + "loss": 0.9924643635749817, + "loss_ce": 0.0005210142116993666, + "loss_iou": 0.302734375, + "loss_num": 0.0771484375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 54099912, + "step": 863 + }, + { + "epoch": 2.875207986688852, + "grad_norm": 12.803565979003906, + "learning_rate": 5e-06, + "loss": 0.7378, + "num_input_tokens_seen": 54162264, + "step": 864 + }, + { + "epoch": 2.875207986688852, + "loss": 0.7761240005493164, + "loss_ce": 0.002198257017880678, + "loss_iou": 0.2216796875, + "loss_num": 0.06591796875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 54162264, + "step": 864 + }, + { + "epoch": 2.8785357737104826, + "grad_norm": 11.305477142333984, + "learning_rate": 5e-06, + "loss": 0.7493, + "num_input_tokens_seen": 54225144, + "step": 865 + }, + { + "epoch": 2.8785357737104826, + "loss": 0.8462469577789307, + "loss_ce": 0.0005437912186607718, + "loss_iou": 0.2890625, + "loss_num": 0.053466796875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 54225144, + "step": 865 + }, + { + "epoch": 2.8818635607321132, + "grad_norm": 12.183488845825195, + "learning_rate": 5e-06, + "loss": 1.0525, + "num_input_tokens_seen": 54286592, + "step": 866 + }, + { + "epoch": 2.8818635607321132, + "loss": 1.0839440822601318, + "loss_ce": 0.0009362882701680064, + "loss_iou": 0.3359375, + "loss_num": 0.08203125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 54286592, + "step": 866 + }, + { + "epoch": 2.885191347753744, + "grad_norm": 10.725017547607422, + "learning_rate": 5e-06, + "loss": 1.0308, + "num_input_tokens_seen": 54348900, + "step": 867 + }, + { + "epoch": 2.885191347753744, + "loss": 0.9614525437355042, + "loss_ce": 2.6763778805616312e-05, + "loss_iou": 0.328125, + "loss_num": 0.06103515625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 54348900, + "step": 867 + }, + { + "epoch": 2.8885191347753745, + "grad_norm": 12.298053741455078, + "learning_rate": 5e-06, + "loss": 0.7657, + "num_input_tokens_seen": 54411716, + "step": 868 + }, + { + "epoch": 2.8885191347753745, + "loss": 0.7437622547149658, + "loss_ce": 0.0010864399373531342, + "loss_iou": 0.2431640625, + "loss_num": 0.051025390625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 54411716, + "step": 868 + }, + { + "epoch": 2.891846921797005, + "grad_norm": 29.771516799926758, + "learning_rate": 5e-06, + "loss": 0.8699, + "num_input_tokens_seen": 54474996, + "step": 869 + }, + { + "epoch": 2.891846921797005, + "loss": 0.875381350517273, + "loss_ce": 0.0006255026091821492, + "loss_iou": 0.296875, + "loss_num": 0.05615234375, + "loss_xval": 0.875, + "num_input_tokens_seen": 54474996, + "step": 869 + }, + { + "epoch": 2.8951747088186357, + "grad_norm": 22.231077194213867, + "learning_rate": 5e-06, + "loss": 0.6807, + "num_input_tokens_seen": 54538292, + "step": 870 + }, + { + "epoch": 2.8951747088186357, + "loss": 0.6149954795837402, + "loss_ce": 0.0007987120770849288, + "loss_iou": 0.2265625, + "loss_num": 0.0322265625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 54538292, + "step": 870 + }, + { + "epoch": 2.8985024958402663, + "grad_norm": 13.072921752929688, + "learning_rate": 5e-06, + "loss": 0.9311, + "num_input_tokens_seen": 54600360, + "step": 871 + }, + { + "epoch": 2.8985024958402663, + "loss": 1.268268346786499, + "loss_ce": 0.0002019708917941898, + "loss_iou": 0.390625, + "loss_num": 0.09716796875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 54600360, + "step": 871 + }, + { + "epoch": 2.901830282861897, + "grad_norm": 12.388557434082031, + "learning_rate": 5e-06, + "loss": 0.8199, + "num_input_tokens_seen": 54662656, + "step": 872 + }, + { + "epoch": 2.901830282861897, + "loss": 0.6191840767860413, + "loss_ce": 4.344484841567464e-05, + "loss_iou": 0.203125, + "loss_num": 0.04248046875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 54662656, + "step": 872 + }, + { + "epoch": 2.9051580698835275, + "grad_norm": 7.256121635437012, + "learning_rate": 5e-06, + "loss": 0.7057, + "num_input_tokens_seen": 54723256, + "step": 873 + }, + { + "epoch": 2.9051580698835275, + "loss": 0.5848498344421387, + "loss_ce": 0.001109591918066144, + "loss_iou": 0.1484375, + "loss_num": 0.05712890625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 54723256, + "step": 873 + }, + { + "epoch": 2.908485856905158, + "grad_norm": 30.038572311401367, + "learning_rate": 5e-06, + "loss": 1.0476, + "num_input_tokens_seen": 54786872, + "step": 874 + }, + { + "epoch": 2.908485856905158, + "loss": 0.7975156903266907, + "loss_ce": 0.000274513236945495, + "loss_iou": 0.24609375, + "loss_num": 0.061279296875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 54786872, + "step": 874 + }, + { + "epoch": 2.9118136439267888, + "grad_norm": 27.706817626953125, + "learning_rate": 5e-06, + "loss": 0.8176, + "num_input_tokens_seen": 54848424, + "step": 875 + }, + { + "epoch": 2.9118136439267888, + "loss": 0.6453001499176025, + "loss_ce": 0.0006468780338764191, + "loss_iou": 0.1865234375, + "loss_num": 0.05419921875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 54848424, + "step": 875 + }, + { + "epoch": 2.9151414309484194, + "grad_norm": 16.334976196289062, + "learning_rate": 5e-06, + "loss": 0.828, + "num_input_tokens_seen": 54910524, + "step": 876 + }, + { + "epoch": 2.9151414309484194, + "loss": 0.9851783514022827, + "loss_ce": 7.095631735865027e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0732421875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 54910524, + "step": 876 + }, + { + "epoch": 2.91846921797005, + "grad_norm": 26.634248733520508, + "learning_rate": 5e-06, + "loss": 0.8181, + "num_input_tokens_seen": 54973516, + "step": 877 + }, + { + "epoch": 2.91846921797005, + "loss": 0.83323734998703, + "loss_ce": 0.00035160701372660697, + "loss_iou": 0.2890625, + "loss_num": 0.05078125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 54973516, + "step": 877 + }, + { + "epoch": 2.9217970049916806, + "grad_norm": 22.770780563354492, + "learning_rate": 5e-06, + "loss": 0.897, + "num_input_tokens_seen": 55034692, + "step": 878 + }, + { + "epoch": 2.9217970049916806, + "loss": 0.9181663990020752, + "loss_ce": 0.00019761671137530357, + "loss_iou": 0.328125, + "loss_num": 0.05224609375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 55034692, + "step": 878 + }, + { + "epoch": 2.925124792013311, + "grad_norm": 14.436359405517578, + "learning_rate": 5e-06, + "loss": 0.7651, + "num_input_tokens_seen": 55096088, + "step": 879 + }, + { + "epoch": 2.925124792013311, + "loss": 0.8270174860954285, + "loss_ce": 0.00011315852316329256, + "loss_iou": 0.255859375, + "loss_num": 0.06298828125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 55096088, + "step": 879 + }, + { + "epoch": 2.928452579034942, + "grad_norm": 21.74774742126465, + "learning_rate": 5e-06, + "loss": 0.9191, + "num_input_tokens_seen": 55159628, + "step": 880 + }, + { + "epoch": 2.928452579034942, + "loss": 0.9564664363861084, + "loss_ce": 0.0008694813586771488, + "loss_iou": 0.3359375, + "loss_num": 0.056396484375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 55159628, + "step": 880 + }, + { + "epoch": 2.9317803660565724, + "grad_norm": 13.85302448272705, + "learning_rate": 5e-06, + "loss": 0.6639, + "num_input_tokens_seen": 55222304, + "step": 881 + }, + { + "epoch": 2.9317803660565724, + "loss": 0.6171444654464722, + "loss_ce": 0.0015438641421496868, + "loss_iou": 0.2060546875, + "loss_num": 0.04052734375, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 55222304, + "step": 881 + }, + { + "epoch": 2.935108153078203, + "grad_norm": 15.573785781860352, + "learning_rate": 5e-06, + "loss": 0.9698, + "num_input_tokens_seen": 55285600, + "step": 882 + }, + { + "epoch": 2.935108153078203, + "loss": 1.1866347789764404, + "loss_ce": 0.0001112837198888883, + "loss_iou": 0.44140625, + "loss_num": 0.060791015625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 55285600, + "step": 882 + }, + { + "epoch": 2.9384359400998337, + "grad_norm": 9.72053337097168, + "learning_rate": 5e-06, + "loss": 1.011, + "num_input_tokens_seen": 55349256, + "step": 883 + }, + { + "epoch": 2.9384359400998337, + "loss": 0.9649059176445007, + "loss_ce": 0.0007946036057546735, + "loss_iou": 0.3125, + "loss_num": 0.06787109375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 55349256, + "step": 883 + }, + { + "epoch": 2.9417637271214643, + "grad_norm": 40.047515869140625, + "learning_rate": 5e-06, + "loss": 0.9117, + "num_input_tokens_seen": 55413708, + "step": 884 + }, + { + "epoch": 2.9417637271214643, + "loss": 0.8685333132743835, + "loss_ce": 0.0026885762345045805, + "loss_iou": 0.28125, + "loss_num": 0.060546875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 55413708, + "step": 884 + }, + { + "epoch": 2.945091514143095, + "grad_norm": 29.746788024902344, + "learning_rate": 5e-06, + "loss": 1.1525, + "num_input_tokens_seen": 55478204, + "step": 885 + }, + { + "epoch": 2.945091514143095, + "loss": 1.2547807693481445, + "loss_ce": 0.00624566525220871, + "loss_iou": 0.453125, + "loss_num": 0.06884765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 55478204, + "step": 885 + }, + { + "epoch": 2.9484193011647255, + "grad_norm": 14.427910804748535, + "learning_rate": 5e-06, + "loss": 0.8744, + "num_input_tokens_seen": 55540140, + "step": 886 + }, + { + "epoch": 2.9484193011647255, + "loss": 0.9295307993888855, + "loss_ce": 0.0006977797020226717, + "loss_iou": 0.302734375, + "loss_num": 0.064453125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 55540140, + "step": 886 + }, + { + "epoch": 2.951747088186356, + "grad_norm": 13.86884593963623, + "learning_rate": 5e-06, + "loss": 0.7577, + "num_input_tokens_seen": 55602724, + "step": 887 + }, + { + "epoch": 2.951747088186356, + "loss": 0.9324434399604797, + "loss_ce": 0.0006807436002418399, + "loss_iou": 0.314453125, + "loss_num": 0.060546875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 55602724, + "step": 887 + }, + { + "epoch": 2.9550748752079867, + "grad_norm": 14.230158805847168, + "learning_rate": 5e-06, + "loss": 1.1293, + "num_input_tokens_seen": 55667092, + "step": 888 + }, + { + "epoch": 2.9550748752079867, + "loss": 1.015046238899231, + "loss_ce": 0.0013743427116423845, + "loss_iou": 0.349609375, + "loss_num": 0.06298828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 55667092, + "step": 888 + }, + { + "epoch": 2.9584026622296173, + "grad_norm": 13.532299041748047, + "learning_rate": 5e-06, + "loss": 0.7976, + "num_input_tokens_seen": 55729756, + "step": 889 + }, + { + "epoch": 2.9584026622296173, + "loss": 0.7759445905685425, + "loss_ce": 0.0006149307591840625, + "loss_iou": 0.2734375, + "loss_num": 0.04541015625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 55729756, + "step": 889 + }, + { + "epoch": 2.961730449251248, + "grad_norm": 15.530501365661621, + "learning_rate": 5e-06, + "loss": 0.6693, + "num_input_tokens_seen": 55792828, + "step": 890 + }, + { + "epoch": 2.961730449251248, + "loss": 0.7313098907470703, + "loss_ce": 0.0003528632805682719, + "loss_iou": 0.2041015625, + "loss_num": 0.06494140625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 55792828, + "step": 890 + }, + { + "epoch": 2.9650582362728786, + "grad_norm": 15.952990531921387, + "learning_rate": 5e-06, + "loss": 0.8108, + "num_input_tokens_seen": 55854952, + "step": 891 + }, + { + "epoch": 2.9650582362728786, + "loss": 0.6857528686523438, + "loss_ce": 0.0009384113945998251, + "loss_iou": 0.23828125, + "loss_num": 0.041748046875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 55854952, + "step": 891 + }, + { + "epoch": 2.968386023294509, + "grad_norm": 7.85128116607666, + "learning_rate": 5e-06, + "loss": 0.8289, + "num_input_tokens_seen": 55917956, + "step": 892 + }, + { + "epoch": 2.968386023294509, + "loss": 0.668018102645874, + "loss_ce": 4.93402112624608e-05, + "loss_iou": 0.23046875, + "loss_num": 0.04150390625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 55917956, + "step": 892 + }, + { + "epoch": 2.97171381031614, + "grad_norm": 17.0181884765625, + "learning_rate": 5e-06, + "loss": 0.6734, + "num_input_tokens_seen": 55979892, + "step": 893 + }, + { + "epoch": 2.97171381031614, + "loss": 0.6943376064300537, + "loss_ce": 0.0009782593697309494, + "loss_iou": 0.201171875, + "loss_num": 0.05810546875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 55979892, + "step": 893 + }, + { + "epoch": 2.9750415973377704, + "grad_norm": 24.690635681152344, + "learning_rate": 5e-06, + "loss": 0.7435, + "num_input_tokens_seen": 56042384, + "step": 894 + }, + { + "epoch": 2.9750415973377704, + "loss": 0.47268909215927124, + "loss_ce": 0.00015488307690247893, + "loss_iou": 0.158203125, + "loss_num": 0.031494140625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 56042384, + "step": 894 + }, + { + "epoch": 2.978369384359401, + "grad_norm": 18.475412368774414, + "learning_rate": 5e-06, + "loss": 0.8394, + "num_input_tokens_seen": 56104388, + "step": 895 + }, + { + "epoch": 2.978369384359401, + "loss": 0.7747071981430054, + "loss_ce": 4.900360363535583e-05, + "loss_iou": 0.17578125, + "loss_num": 0.0849609375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 56104388, + "step": 895 + }, + { + "epoch": 2.9816971713810316, + "grad_norm": 10.097528457641602, + "learning_rate": 5e-06, + "loss": 0.5897, + "num_input_tokens_seen": 56167044, + "step": 896 + }, + { + "epoch": 2.9816971713810316, + "loss": 0.6344481706619263, + "loss_ce": 0.0010939212515950203, + "loss_iou": 0.1767578125, + "loss_num": 0.055908203125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 56167044, + "step": 896 + }, + { + "epoch": 2.9850249584026622, + "grad_norm": 15.803515434265137, + "learning_rate": 5e-06, + "loss": 0.623, + "num_input_tokens_seen": 56229968, + "step": 897 + }, + { + "epoch": 2.9850249584026622, + "loss": 0.4044834077358246, + "loss_ce": 0.00033913765219040215, + "loss_iou": 0.1103515625, + "loss_num": 0.036865234375, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 56229968, + "step": 897 + }, + { + "epoch": 2.988352745424293, + "grad_norm": 10.865888595581055, + "learning_rate": 5e-06, + "loss": 0.706, + "num_input_tokens_seen": 56293428, + "step": 898 + }, + { + "epoch": 2.988352745424293, + "loss": 0.6543700695037842, + "loss_ce": 1.2155430340499151e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.05029296875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 56293428, + "step": 898 + }, + { + "epoch": 2.9916805324459235, + "grad_norm": 10.439217567443848, + "learning_rate": 5e-06, + "loss": 0.9566, + "num_input_tokens_seen": 56353876, + "step": 899 + }, + { + "epoch": 2.9916805324459235, + "loss": 0.7114546298980713, + "loss_ce": 2.8855994969489984e-05, + "loss_iou": 0.2451171875, + "loss_num": 0.044189453125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 56353876, + "step": 899 + }, + { + "epoch": 2.995008319467554, + "grad_norm": 31.885540008544922, + "learning_rate": 5e-06, + "loss": 1.0362, + "num_input_tokens_seen": 56418016, + "step": 900 + }, + { + "epoch": 2.995008319467554, + "loss": 1.0725525617599487, + "loss_ce": 0.00028691417537629604, + "loss_iou": 0.359375, + "loss_num": 0.0712890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 56418016, + "step": 900 + }, + { + "epoch": 2.9983361064891847, + "grad_norm": 31.542757034301758, + "learning_rate": 5e-06, + "loss": 0.915, + "num_input_tokens_seen": 56479904, + "step": 901 + }, + { + "epoch": 2.9983361064891847, + "loss": 1.0275923013687134, + "loss_ce": 0.0026899336371570826, + "loss_iou": 0.35546875, + "loss_num": 0.06298828125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 56479904, + "step": 901 + }, + { + "epoch": 2.9983361064891847, + "loss": 0.8745320439338684, + "loss_ce": 0.00026448973221704364, + "loss_iou": 0.271484375, + "loss_num": 0.06689453125, + "loss_xval": 0.875, + "num_input_tokens_seen": 56512420, + "step": 901 + }, + { + "epoch": 3.0016638935108153, + "grad_norm": 20.686283111572266, + "learning_rate": 5e-06, + "loss": 0.9452, + "num_input_tokens_seen": 56544136, + "step": 902 + }, + { + "epoch": 3.0016638935108153, + "loss": 1.015965461730957, + "loss_ce": 0.0008286432130262256, + "loss_iou": 0.35546875, + "loss_num": 0.060791015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 56544136, + "step": 902 + }, + { + "epoch": 3.004991680532446, + "grad_norm": 30.531023025512695, + "learning_rate": 5e-06, + "loss": 0.9768, + "num_input_tokens_seen": 56606324, + "step": 903 + }, + { + "epoch": 3.004991680532446, + "loss": 0.9323800802230835, + "loss_ce": 0.00012908241478726268, + "loss_iou": 0.326171875, + "loss_num": 0.05615234375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 56606324, + "step": 903 + }, + { + "epoch": 3.0083194675540765, + "grad_norm": 13.145706176757812, + "learning_rate": 5e-06, + "loss": 0.7993, + "num_input_tokens_seen": 56668452, + "step": 904 + }, + { + "epoch": 3.0083194675540765, + "loss": 0.9373518228530884, + "loss_ce": 9.595306619303301e-05, + "loss_iou": 0.298828125, + "loss_num": 0.06787109375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 56668452, + "step": 904 + }, + { + "epoch": 3.011647254575707, + "grad_norm": 157.36032104492188, + "learning_rate": 5e-06, + "loss": 0.6418, + "num_input_tokens_seen": 56730152, + "step": 905 + }, + { + "epoch": 3.011647254575707, + "loss": 0.5183830261230469, + "loss_ce": 0.002696982817724347, + "loss_iou": 0.0, + "loss_num": 0.10302734375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 56730152, + "step": 905 + }, + { + "epoch": 3.0149750415973378, + "grad_norm": 36.10614013671875, + "learning_rate": 5e-06, + "loss": 0.7523, + "num_input_tokens_seen": 56792164, + "step": 906 + }, + { + "epoch": 3.0149750415973378, + "loss": 1.0320461988449097, + "loss_ce": 6.377464160323143e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0634765625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 56792164, + "step": 906 + }, + { + "epoch": 3.0183028286189684, + "grad_norm": 6.202398777008057, + "learning_rate": 5e-06, + "loss": 0.6772, + "num_input_tokens_seen": 56852804, + "step": 907 + }, + { + "epoch": 3.0183028286189684, + "loss": 0.6682400107383728, + "loss_ce": 2.7143531042383984e-05, + "loss_iou": 0.224609375, + "loss_num": 0.043701171875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 56852804, + "step": 907 + }, + { + "epoch": 3.021630615640599, + "grad_norm": 15.27850341796875, + "learning_rate": 5e-06, + "loss": 0.8055, + "num_input_tokens_seen": 56915900, + "step": 908 + }, + { + "epoch": 3.021630615640599, + "loss": 0.9380273222923279, + "loss_ce": 3.903326069121249e-05, + "loss_iou": 0.341796875, + "loss_num": 0.05078125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 56915900, + "step": 908 + }, + { + "epoch": 3.0249584026622296, + "grad_norm": 24.6085147857666, + "learning_rate": 5e-06, + "loss": 1.1187, + "num_input_tokens_seen": 56978488, + "step": 909 + }, + { + "epoch": 3.0249584026622296, + "loss": 1.3314964771270752, + "loss_ce": 0.002883219625800848, + "loss_iou": 0.470703125, + "loss_num": 0.0771484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 56978488, + "step": 909 + }, + { + "epoch": 3.02828618968386, + "grad_norm": 21.691267013549805, + "learning_rate": 5e-06, + "loss": 0.7511, + "num_input_tokens_seen": 57042312, + "step": 910 + }, + { + "epoch": 3.02828618968386, + "loss": 0.7569243907928467, + "loss_ce": 0.0004546991258393973, + "loss_iou": 0.248046875, + "loss_num": 0.05224609375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 57042312, + "step": 910 + }, + { + "epoch": 3.031613976705491, + "grad_norm": 18.6723690032959, + "learning_rate": 5e-06, + "loss": 0.6642, + "num_input_tokens_seen": 57103640, + "step": 911 + }, + { + "epoch": 3.031613976705491, + "loss": 0.7280317544937134, + "loss_ce": 0.00024860326084308326, + "loss_iou": 0.216796875, + "loss_num": 0.05859375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 57103640, + "step": 911 + }, + { + "epoch": 3.0349417637271214, + "grad_norm": 16.631738662719727, + "learning_rate": 5e-06, + "loss": 0.6276, + "num_input_tokens_seen": 57165720, + "step": 912 + }, + { + "epoch": 3.0349417637271214, + "loss": 0.6402130126953125, + "loss_ce": 0.0010528563288971782, + "loss_iou": 0.21484375, + "loss_num": 0.041748046875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 57165720, + "step": 912 + }, + { + "epoch": 3.038269550748752, + "grad_norm": 11.869771957397461, + "learning_rate": 5e-06, + "loss": 0.8904, + "num_input_tokens_seen": 57228768, + "step": 913 + }, + { + "epoch": 3.038269550748752, + "loss": 0.7842072248458862, + "loss_ce": 2.7603602575254627e-05, + "loss_iou": 0.2041015625, + "loss_num": 0.07470703125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 57228768, + "step": 913 + }, + { + "epoch": 3.0415973377703827, + "grad_norm": 16.256175994873047, + "learning_rate": 5e-06, + "loss": 0.7901, + "num_input_tokens_seen": 57290928, + "step": 914 + }, + { + "epoch": 3.0415973377703827, + "loss": 0.9927595257759094, + "loss_ce": 8.371622243430465e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0634765625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 57290928, + "step": 914 + }, + { + "epoch": 3.0449251247920133, + "grad_norm": 24.751188278198242, + "learning_rate": 5e-06, + "loss": 0.6958, + "num_input_tokens_seen": 57353872, + "step": 915 + }, + { + "epoch": 3.0449251247920133, + "loss": 0.7315420508384705, + "loss_ce": 0.00424710288643837, + "loss_iou": 0.23046875, + "loss_num": 0.052734375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 57353872, + "step": 915 + }, + { + "epoch": 3.048252911813644, + "grad_norm": 16.81283187866211, + "learning_rate": 5e-06, + "loss": 1.0033, + "num_input_tokens_seen": 57418248, + "step": 916 + }, + { + "epoch": 3.048252911813644, + "loss": 0.8678412437438965, + "loss_ce": 0.0008979164995253086, + "loss_iou": 0.341796875, + "loss_num": 0.037109375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 57418248, + "step": 916 + }, + { + "epoch": 3.0515806988352745, + "grad_norm": 8.283062934875488, + "learning_rate": 5e-06, + "loss": 0.8209, + "num_input_tokens_seen": 57481256, + "step": 917 + }, + { + "epoch": 3.0515806988352745, + "loss": 0.8476381897926331, + "loss_ce": 0.00047019918565638363, + "loss_iou": 0.30078125, + "loss_num": 0.049560546875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 57481256, + "step": 917 + }, + { + "epoch": 3.054908485856905, + "grad_norm": 19.258445739746094, + "learning_rate": 5e-06, + "loss": 0.7102, + "num_input_tokens_seen": 57543436, + "step": 918 + }, + { + "epoch": 3.054908485856905, + "loss": 0.8542848825454712, + "loss_ce": 0.0001589024905115366, + "loss_iou": 0.2490234375, + "loss_num": 0.0712890625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 57543436, + "step": 918 + }, + { + "epoch": 3.0582362728785357, + "grad_norm": 24.21323013305664, + "learning_rate": 5e-06, + "loss": 0.7274, + "num_input_tokens_seen": 57606988, + "step": 919 + }, + { + "epoch": 3.0582362728785357, + "loss": 0.861693263053894, + "loss_ce": 0.0008534241351298988, + "loss_iou": 0.294921875, + "loss_num": 0.053955078125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 57606988, + "step": 919 + }, + { + "epoch": 3.0615640599001663, + "grad_norm": 28.660486221313477, + "learning_rate": 5e-06, + "loss": 0.5461, + "num_input_tokens_seen": 57668872, + "step": 920 + }, + { + "epoch": 3.0615640599001663, + "loss": 0.6151924133300781, + "loss_ce": 0.0004463810473680496, + "loss_iou": 0.2119140625, + "loss_num": 0.038330078125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 57668872, + "step": 920 + }, + { + "epoch": 3.064891846921797, + "grad_norm": 10.633936882019043, + "learning_rate": 5e-06, + "loss": 0.9587, + "num_input_tokens_seen": 57732472, + "step": 921 + }, + { + "epoch": 3.064891846921797, + "loss": 1.0549280643463135, + "loss_ce": 0.00048464565770700574, + "loss_iou": 0.369140625, + "loss_num": 0.06298828125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 57732472, + "step": 921 + }, + { + "epoch": 3.0682196339434276, + "grad_norm": 26.227346420288086, + "learning_rate": 5e-06, + "loss": 0.966, + "num_input_tokens_seen": 57796336, + "step": 922 + }, + { + "epoch": 3.0682196339434276, + "loss": 0.8780733346939087, + "loss_ce": 0.0028292066417634487, + "loss_iou": 0.251953125, + "loss_num": 0.07470703125, + "loss_xval": 0.875, + "num_input_tokens_seen": 57796336, + "step": 922 + }, + { + "epoch": 3.071547420965058, + "grad_norm": 20.741628646850586, + "learning_rate": 5e-06, + "loss": 0.8608, + "num_input_tokens_seen": 57861404, + "step": 923 + }, + { + "epoch": 3.071547420965058, + "loss": 0.9321870803833008, + "loss_ce": 0.00030230951961129904, + "loss_iou": 0.341796875, + "loss_num": 0.050048828125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 57861404, + "step": 923 + }, + { + "epoch": 3.074875207986689, + "grad_norm": 10.478264808654785, + "learning_rate": 5e-06, + "loss": 0.598, + "num_input_tokens_seen": 57923204, + "step": 924 + }, + { + "epoch": 3.074875207986689, + "loss": 0.46035149693489075, + "loss_ce": 8.537084795534611e-05, + "loss_iou": 0.09716796875, + "loss_num": 0.05322265625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 57923204, + "step": 924 + }, + { + "epoch": 3.0782029950083194, + "grad_norm": 15.388328552246094, + "learning_rate": 5e-06, + "loss": 1.0425, + "num_input_tokens_seen": 57985892, + "step": 925 + }, + { + "epoch": 3.0782029950083194, + "loss": 1.1015738248825073, + "loss_ce": 0.001964439172297716, + "loss_iou": 0.33203125, + "loss_num": 0.0869140625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 57985892, + "step": 925 + }, + { + "epoch": 3.08153078202995, + "grad_norm": 10.75679874420166, + "learning_rate": 5e-06, + "loss": 0.796, + "num_input_tokens_seen": 58049620, + "step": 926 + }, + { + "epoch": 3.08153078202995, + "loss": 0.7979081273078918, + "loss_ce": 5.657908695866354e-05, + "loss_iou": 0.2265625, + "loss_num": 0.06884765625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 58049620, + "step": 926 + }, + { + "epoch": 3.0848585690515806, + "grad_norm": 11.969529151916504, + "learning_rate": 5e-06, + "loss": 0.577, + "num_input_tokens_seen": 58111152, + "step": 927 + }, + { + "epoch": 3.0848585690515806, + "loss": 0.7319214344024658, + "loss_ce": 0.00010991955059580505, + "loss_iou": 0.248046875, + "loss_num": 0.04736328125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 58111152, + "step": 927 + }, + { + "epoch": 3.0881863560732112, + "grad_norm": 9.87148666381836, + "learning_rate": 5e-06, + "loss": 0.7442, + "num_input_tokens_seen": 58175032, + "step": 928 + }, + { + "epoch": 3.0881863560732112, + "loss": 0.6432963013648987, + "loss_ce": 0.0014505886938422918, + "loss_iou": 0.2216796875, + "loss_num": 0.03955078125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 58175032, + "step": 928 + }, + { + "epoch": 3.091514143094842, + "grad_norm": 52.29267501831055, + "learning_rate": 5e-06, + "loss": 0.9138, + "num_input_tokens_seen": 58237164, + "step": 929 + }, + { + "epoch": 3.091514143094842, + "loss": 0.9514411091804504, + "loss_ce": 0.0002692709385883063, + "loss_iou": 0.3359375, + "loss_num": 0.055419921875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 58237164, + "step": 929 + }, + { + "epoch": 3.0948419301164725, + "grad_norm": 12.875478744506836, + "learning_rate": 5e-06, + "loss": 0.9315, + "num_input_tokens_seen": 58300844, + "step": 930 + }, + { + "epoch": 3.0948419301164725, + "loss": 0.9565946459770203, + "loss_ce": 0.0007841241895221174, + "loss_iou": 0.33203125, + "loss_num": 0.058349609375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 58300844, + "step": 930 + }, + { + "epoch": 3.098169717138103, + "grad_norm": 22.145496368408203, + "learning_rate": 5e-06, + "loss": 0.7771, + "num_input_tokens_seen": 58365028, + "step": 931 + }, + { + "epoch": 3.098169717138103, + "loss": 0.6949498653411865, + "loss_ce": 0.00036975587136112154, + "loss_iou": 0.259765625, + "loss_num": 0.034912109375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 58365028, + "step": 931 + }, + { + "epoch": 3.1014975041597337, + "grad_norm": 16.330707550048828, + "learning_rate": 5e-06, + "loss": 0.7964, + "num_input_tokens_seen": 58426292, + "step": 932 + }, + { + "epoch": 3.1014975041597337, + "loss": 0.9439092874526978, + "loss_ce": 0.0003057535504922271, + "loss_iou": 0.25390625, + "loss_num": 0.0869140625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 58426292, + "step": 932 + }, + { + "epoch": 3.1048252911813643, + "grad_norm": 10.452964782714844, + "learning_rate": 5e-06, + "loss": 0.7588, + "num_input_tokens_seen": 58488240, + "step": 933 + }, + { + "epoch": 3.1048252911813643, + "loss": 0.8706070780754089, + "loss_ce": 0.001954704290255904, + "loss_iou": 0.296875, + "loss_num": 0.0546875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 58488240, + "step": 933 + }, + { + "epoch": 3.108153078202995, + "grad_norm": 18.522306442260742, + "learning_rate": 5e-06, + "loss": 0.9628, + "num_input_tokens_seen": 58551624, + "step": 934 + }, + { + "epoch": 3.108153078202995, + "loss": 0.8793235421180725, + "loss_ce": 0.0013938324991613626, + "loss_iou": 0.345703125, + "loss_num": 0.037841796875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 58551624, + "step": 934 + }, + { + "epoch": 3.1114808652246255, + "grad_norm": 7.724632740020752, + "learning_rate": 5e-06, + "loss": 0.778, + "num_input_tokens_seen": 58613036, + "step": 935 + }, + { + "epoch": 3.1114808652246255, + "loss": 1.0509037971496582, + "loss_ce": 0.007446764037013054, + "loss_iou": 0.26171875, + "loss_num": 0.10400390625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 58613036, + "step": 935 + }, + { + "epoch": 3.114808652246256, + "grad_norm": 12.31716537475586, + "learning_rate": 5e-06, + "loss": 0.9438, + "num_input_tokens_seen": 58675872, + "step": 936 + }, + { + "epoch": 3.114808652246256, + "loss": 0.9533208608627319, + "loss_ce": 0.0005620683077722788, + "loss_iou": 0.30859375, + "loss_num": 0.06689453125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 58675872, + "step": 936 + }, + { + "epoch": 3.1181364392678868, + "grad_norm": 14.161133766174316, + "learning_rate": 5e-06, + "loss": 0.8589, + "num_input_tokens_seen": 58738884, + "step": 937 + }, + { + "epoch": 3.1181364392678868, + "loss": 0.8286876678466797, + "loss_ce": 0.0001964616822078824, + "loss_iou": 0.310546875, + "loss_num": 0.041259765625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 58738884, + "step": 937 + }, + { + "epoch": 3.1214642262895174, + "grad_norm": 17.004718780517578, + "learning_rate": 5e-06, + "loss": 0.7602, + "num_input_tokens_seen": 58802072, + "step": 938 + }, + { + "epoch": 3.1214642262895174, + "loss": 0.821278989315033, + "loss_ce": 0.0004782435134984553, + "loss_iou": 0.298828125, + "loss_num": 0.044921875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 58802072, + "step": 938 + }, + { + "epoch": 3.124792013311148, + "grad_norm": 12.556039810180664, + "learning_rate": 5e-06, + "loss": 0.8359, + "num_input_tokens_seen": 58864708, + "step": 939 + }, + { + "epoch": 3.124792013311148, + "loss": 0.8781052827835083, + "loss_ce": 0.0005417764186859131, + "loss_iou": 0.298828125, + "loss_num": 0.055908203125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 58864708, + "step": 939 + }, + { + "epoch": 3.1281198003327786, + "grad_norm": 14.815278053283691, + "learning_rate": 5e-06, + "loss": 0.8864, + "num_input_tokens_seen": 58926916, + "step": 940 + }, + { + "epoch": 3.1281198003327786, + "loss": 0.9597880840301514, + "loss_ce": 0.0008037795196287334, + "loss_iou": 0.357421875, + "loss_num": 0.048828125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 58926916, + "step": 940 + }, + { + "epoch": 3.131447587354409, + "grad_norm": 17.76370620727539, + "learning_rate": 5e-06, + "loss": 0.84, + "num_input_tokens_seen": 58988668, + "step": 941 + }, + { + "epoch": 3.131447587354409, + "loss": 0.88735431432724, + "loss_ce": 0.00042194739216938615, + "loss_iou": 0.32421875, + "loss_num": 0.048095703125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 58988668, + "step": 941 + }, + { + "epoch": 3.13477537437604, + "grad_norm": 36.662315368652344, + "learning_rate": 5e-06, + "loss": 0.8738, + "num_input_tokens_seen": 59052520, + "step": 942 + }, + { + "epoch": 3.13477537437604, + "loss": 0.8464229106903076, + "loss_ce": 0.0009028796339407563, + "loss_iou": 0.322265625, + "loss_num": 0.0400390625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 59052520, + "step": 942 + }, + { + "epoch": 3.1381031613976704, + "grad_norm": 8.20991039276123, + "learning_rate": 5e-06, + "loss": 0.5881, + "num_input_tokens_seen": 59114480, + "step": 943 + }, + { + "epoch": 3.1381031613976704, + "loss": 0.5021640658378601, + "loss_ce": 8.887949661584571e-05, + "loss_iou": 0.1484375, + "loss_num": 0.041259765625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 59114480, + "step": 943 + }, + { + "epoch": 3.141430948419301, + "grad_norm": 18.209897994995117, + "learning_rate": 5e-06, + "loss": 1.0336, + "num_input_tokens_seen": 59175212, + "step": 944 + }, + { + "epoch": 3.141430948419301, + "loss": 1.2060582637786865, + "loss_ce": 0.002444992307573557, + "loss_iou": 0.40234375, + "loss_num": 0.080078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 59175212, + "step": 944 + }, + { + "epoch": 3.1447587354409317, + "grad_norm": 19.541431427001953, + "learning_rate": 5e-06, + "loss": 0.9663, + "num_input_tokens_seen": 59239460, + "step": 945 + }, + { + "epoch": 3.1447587354409317, + "loss": 0.7803661823272705, + "loss_ce": 9.27629807847552e-05, + "loss_iou": 0.265625, + "loss_num": 0.0498046875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 59239460, + "step": 945 + }, + { + "epoch": 3.1480865224625623, + "grad_norm": 10.669411659240723, + "learning_rate": 5e-06, + "loss": 1.0049, + "num_input_tokens_seen": 59301360, + "step": 946 + }, + { + "epoch": 3.1480865224625623, + "loss": 1.1545720100402832, + "loss_ce": 3.09227907564491e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0888671875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 59301360, + "step": 946 + }, + { + "epoch": 3.151414309484193, + "grad_norm": 12.653473854064941, + "learning_rate": 5e-06, + "loss": 0.7067, + "num_input_tokens_seen": 59365016, + "step": 947 + }, + { + "epoch": 3.151414309484193, + "loss": 0.7654366493225098, + "loss_ce": 0.0005440429667942226, + "loss_iou": 0.2734375, + "loss_num": 0.0439453125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 59365016, + "step": 947 + }, + { + "epoch": 3.1547420965058235, + "grad_norm": 11.480525016784668, + "learning_rate": 5e-06, + "loss": 0.6465, + "num_input_tokens_seen": 59427212, + "step": 948 + }, + { + "epoch": 3.1547420965058235, + "loss": 0.8721858263015747, + "loss_ce": 0.0003596378955990076, + "loss_iou": 0.326171875, + "loss_num": 0.04345703125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 59427212, + "step": 948 + }, + { + "epoch": 3.158069883527454, + "grad_norm": 34.165103912353516, + "learning_rate": 5e-06, + "loss": 0.9143, + "num_input_tokens_seen": 59491056, + "step": 949 + }, + { + "epoch": 3.158069883527454, + "loss": 0.9038797616958618, + "loss_ce": 7.110174919944257e-05, + "loss_iou": 0.30859375, + "loss_num": 0.057861328125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 59491056, + "step": 949 + }, + { + "epoch": 3.1613976705490847, + "grad_norm": 24.460308074951172, + "learning_rate": 5e-06, + "loss": 0.827, + "num_input_tokens_seen": 59552212, + "step": 950 + }, + { + "epoch": 3.1613976705490847, + "loss": 0.9148125052452087, + "loss_ce": 1.7638567442190833e-05, + "loss_iou": 0.291015625, + "loss_num": 0.06689453125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 59552212, + "step": 950 + }, + { + "epoch": 3.1647254575707153, + "grad_norm": 16.602678298950195, + "learning_rate": 5e-06, + "loss": 0.7661, + "num_input_tokens_seen": 59615180, + "step": 951 + }, + { + "epoch": 3.1647254575707153, + "loss": 0.9075579047203064, + "loss_ce": 0.0015520007582381368, + "loss_iou": 0.25, + "loss_num": 0.0810546875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 59615180, + "step": 951 + }, + { + "epoch": 3.168053244592346, + "grad_norm": 14.823358535766602, + "learning_rate": 5e-06, + "loss": 0.69, + "num_input_tokens_seen": 59678104, + "step": 952 + }, + { + "epoch": 3.168053244592346, + "loss": 0.7148585319519043, + "loss_ce": 1.4785388884774875e-05, + "loss_iou": 0.26171875, + "loss_num": 0.038330078125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 59678104, + "step": 952 + }, + { + "epoch": 3.1713810316139766, + "grad_norm": 63.57862091064453, + "learning_rate": 5e-06, + "loss": 0.7865, + "num_input_tokens_seen": 59740976, + "step": 953 + }, + { + "epoch": 3.1713810316139766, + "loss": 0.5665551424026489, + "loss_ce": 0.001247492036782205, + "loss_iou": 0.1572265625, + "loss_num": 0.050048828125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 59740976, + "step": 953 + }, + { + "epoch": 3.174708818635607, + "grad_norm": 34.066715240478516, + "learning_rate": 5e-06, + "loss": 0.812, + "num_input_tokens_seen": 59803580, + "step": 954 + }, + { + "epoch": 3.174708818635607, + "loss": 0.7728263139724731, + "loss_ce": 0.0002432619803585112, + "loss_iou": 0.28515625, + "loss_num": 0.04052734375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 59803580, + "step": 954 + }, + { + "epoch": 3.178036605657238, + "grad_norm": 31.254560470581055, + "learning_rate": 5e-06, + "loss": 1.0825, + "num_input_tokens_seen": 59865328, + "step": 955 + }, + { + "epoch": 3.178036605657238, + "loss": 0.8610126972198486, + "loss_ce": 0.00017285677313338965, + "loss_iou": 0.25, + "loss_num": 0.072265625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 59865328, + "step": 955 + }, + { + "epoch": 3.1813643926788684, + "grad_norm": 10.921689987182617, + "learning_rate": 5e-06, + "loss": 1.0658, + "num_input_tokens_seen": 59927552, + "step": 956 + }, + { + "epoch": 3.1813643926788684, + "loss": 1.1395654678344727, + "loss_ce": 0.0006494110566563904, + "loss_iou": 0.40234375, + "loss_num": 0.06689453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 59927552, + "step": 956 + }, + { + "epoch": 3.184692179700499, + "grad_norm": 9.880757331848145, + "learning_rate": 5e-06, + "loss": 0.8318, + "num_input_tokens_seen": 59989664, + "step": 957 + }, + { + "epoch": 3.184692179700499, + "loss": 0.9110987186431885, + "loss_ce": 0.0009424776071682572, + "loss_iou": 0.3359375, + "loss_num": 0.047607421875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 59989664, + "step": 957 + }, + { + "epoch": 3.1880199667221296, + "grad_norm": 9.38766860961914, + "learning_rate": 5e-06, + "loss": 0.6643, + "num_input_tokens_seen": 60052276, + "step": 958 + }, + { + "epoch": 3.1880199667221296, + "loss": 0.7620002031326294, + "loss_ce": 3.7309764593373984e-05, + "loss_iou": 0.25390625, + "loss_num": 0.050537109375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 60052276, + "step": 958 + }, + { + "epoch": 3.1913477537437602, + "grad_norm": 16.43718719482422, + "learning_rate": 5e-06, + "loss": 0.9924, + "num_input_tokens_seen": 60116120, + "step": 959 + }, + { + "epoch": 3.1913477537437602, + "loss": 1.1171211004257202, + "loss_ce": 0.0013984288088977337, + "loss_iou": 0.375, + "loss_num": 0.0732421875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 60116120, + "step": 959 + }, + { + "epoch": 3.194675540765391, + "grad_norm": 12.237974166870117, + "learning_rate": 5e-06, + "loss": 0.8707, + "num_input_tokens_seen": 60179548, + "step": 960 + }, + { + "epoch": 3.194675540765391, + "loss": 0.8710145950317383, + "loss_ce": 0.0006532109691761434, + "loss_iou": 0.32421875, + "loss_num": 0.044921875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 60179548, + "step": 960 + }, + { + "epoch": 3.1980033277870215, + "grad_norm": 24.791534423828125, + "learning_rate": 5e-06, + "loss": 0.8278, + "num_input_tokens_seen": 60241708, + "step": 961 + }, + { + "epoch": 3.1980033277870215, + "loss": 0.7581548690795898, + "loss_ce": 0.0022954712621867657, + "loss_iou": 0.2431640625, + "loss_num": 0.053955078125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 60241708, + "step": 961 + }, + { + "epoch": 3.201331114808652, + "grad_norm": 28.264705657958984, + "learning_rate": 5e-06, + "loss": 0.473, + "num_input_tokens_seen": 60304508, + "step": 962 + }, + { + "epoch": 3.201331114808652, + "loss": 0.5443636178970337, + "loss_ce": 0.0008455467177554965, + "loss_iou": 0.197265625, + "loss_num": 0.02978515625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 60304508, + "step": 962 + }, + { + "epoch": 3.2046589018302827, + "grad_norm": 10.311064720153809, + "learning_rate": 5e-06, + "loss": 0.6768, + "num_input_tokens_seen": 60367092, + "step": 963 + }, + { + "epoch": 3.2046589018302827, + "loss": 0.8569867610931396, + "loss_ce": 5.31335172127001e-05, + "loss_iou": 0.287109375, + "loss_num": 0.056640625, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 60367092, + "step": 963 + }, + { + "epoch": 3.2079866888519133, + "grad_norm": 10.463589668273926, + "learning_rate": 5e-06, + "loss": 0.838, + "num_input_tokens_seen": 60429284, + "step": 964 + }, + { + "epoch": 3.2079866888519133, + "loss": 0.6484445333480835, + "loss_ce": 0.0007394892745651305, + "loss_iou": 0.2216796875, + "loss_num": 0.040771484375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 60429284, + "step": 964 + }, + { + "epoch": 3.211314475873544, + "grad_norm": 7.585339546203613, + "learning_rate": 5e-06, + "loss": 0.5347, + "num_input_tokens_seen": 60491188, + "step": 965 + }, + { + "epoch": 3.211314475873544, + "loss": 0.5105918049812317, + "loss_ce": 0.0004599463427439332, + "loss_iou": 0.1669921875, + "loss_num": 0.03515625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 60491188, + "step": 965 + }, + { + "epoch": 3.2146422628951745, + "grad_norm": 9.942543029785156, + "learning_rate": 5e-06, + "loss": 0.62, + "num_input_tokens_seen": 60553992, + "step": 966 + }, + { + "epoch": 3.2146422628951745, + "loss": 0.46438688039779663, + "loss_ce": 0.00039763684617355466, + "loss_iou": 0.1435546875, + "loss_num": 0.035400390625, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 60553992, + "step": 966 + }, + { + "epoch": 3.217970049916805, + "grad_norm": 11.350386619567871, + "learning_rate": 5e-06, + "loss": 0.887, + "num_input_tokens_seen": 60615900, + "step": 967 + }, + { + "epoch": 3.217970049916805, + "loss": 0.9106756448745728, + "loss_ce": 3.116061998298392e-05, + "loss_iou": 0.34375, + "loss_num": 0.044921875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 60615900, + "step": 967 + }, + { + "epoch": 3.2212978369384357, + "grad_norm": 11.828593254089355, + "learning_rate": 5e-06, + "loss": 0.9105, + "num_input_tokens_seen": 60678052, + "step": 968 + }, + { + "epoch": 3.2212978369384357, + "loss": 0.7735366225242615, + "loss_ce": 9.917082934407517e-05, + "loss_iou": 0.2138671875, + "loss_num": 0.0693359375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 60678052, + "step": 968 + }, + { + "epoch": 3.2246256239600664, + "grad_norm": 6.765110969543457, + "learning_rate": 5e-06, + "loss": 0.7719, + "num_input_tokens_seen": 60740316, + "step": 969 + }, + { + "epoch": 3.2246256239600664, + "loss": 1.1397228240966797, + "loss_ce": 0.0015393083449453115, + "loss_iou": 0.361328125, + "loss_num": 0.08349609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 60740316, + "step": 969 + }, + { + "epoch": 3.227953410981697, + "grad_norm": 14.274063110351562, + "learning_rate": 5e-06, + "loss": 0.7065, + "num_input_tokens_seen": 60801452, + "step": 970 + }, + { + "epoch": 3.227953410981697, + "loss": 0.7572785019874573, + "loss_ce": 0.00019845366477966309, + "loss_iou": 0.244140625, + "loss_num": 0.053466796875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 60801452, + "step": 970 + }, + { + "epoch": 3.2312811980033276, + "grad_norm": 7.189883232116699, + "learning_rate": 5e-06, + "loss": 0.8128, + "num_input_tokens_seen": 60863756, + "step": 971 + }, + { + "epoch": 3.2312811980033276, + "loss": 0.9057375192642212, + "loss_ce": 0.00021990106324665248, + "loss_iou": 0.349609375, + "loss_num": 0.041259765625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 60863756, + "step": 971 + }, + { + "epoch": 3.234608985024958, + "grad_norm": 20.947919845581055, + "learning_rate": 5e-06, + "loss": 1.029, + "num_input_tokens_seen": 60926592, + "step": 972 + }, + { + "epoch": 3.234608985024958, + "loss": 0.9959653615951538, + "loss_ce": 0.0006040430162101984, + "loss_iou": 0.322265625, + "loss_num": 0.0703125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 60926592, + "step": 972 + }, + { + "epoch": 3.237936772046589, + "grad_norm": 14.039640426635742, + "learning_rate": 5e-06, + "loss": 0.8004, + "num_input_tokens_seen": 60990536, + "step": 973 + }, + { + "epoch": 3.237936772046589, + "loss": 0.8818901777267456, + "loss_ce": 5.422691538115032e-05, + "loss_iou": 0.330078125, + "loss_num": 0.04443359375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 60990536, + "step": 973 + }, + { + "epoch": 3.2412645590682194, + "grad_norm": 12.025979042053223, + "learning_rate": 5e-06, + "loss": 0.6277, + "num_input_tokens_seen": 61053216, + "step": 974 + }, + { + "epoch": 3.2412645590682194, + "loss": 0.6128069758415222, + "loss_ce": 0.0011126084718853235, + "loss_iou": 0.21875, + "loss_num": 0.03466796875, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 61053216, + "step": 974 + }, + { + "epoch": 3.24459234608985, + "grad_norm": 13.156522750854492, + "learning_rate": 5e-06, + "loss": 0.7027, + "num_input_tokens_seen": 61116740, + "step": 975 + }, + { + "epoch": 3.24459234608985, + "loss": 0.7115057706832886, + "loss_ce": 0.0006903592147864401, + "loss_iou": 0.24609375, + "loss_num": 0.0439453125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 61116740, + "step": 975 + }, + { + "epoch": 3.2479201331114806, + "grad_norm": 7.883967876434326, + "learning_rate": 5e-06, + "loss": 0.5578, + "num_input_tokens_seen": 61178912, + "step": 976 + }, + { + "epoch": 3.2479201331114806, + "loss": 0.7233986258506775, + "loss_ce": 0.0003761877305805683, + "loss_iou": 0.25390625, + "loss_num": 0.043212890625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 61178912, + "step": 976 + }, + { + "epoch": 3.2512479201331113, + "grad_norm": 18.929187774658203, + "learning_rate": 5e-06, + "loss": 0.6357, + "num_input_tokens_seen": 61240704, + "step": 977 + }, + { + "epoch": 3.2512479201331113, + "loss": 0.6345537900924683, + "loss_ce": 3.23405911331065e-05, + "loss_iou": 0.181640625, + "loss_num": 0.054443359375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 61240704, + "step": 977 + }, + { + "epoch": 3.254575707154742, + "grad_norm": 11.569648742675781, + "learning_rate": 5e-06, + "loss": 0.9763, + "num_input_tokens_seen": 61303340, + "step": 978 + }, + { + "epoch": 3.254575707154742, + "loss": 0.850032389163971, + "loss_ce": 0.00042301719076931477, + "loss_iou": 0.21484375, + "loss_num": 0.083984375, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 61303340, + "step": 978 + }, + { + "epoch": 3.2579034941763725, + "grad_norm": 18.81865692138672, + "learning_rate": 5e-06, + "loss": 0.7637, + "num_input_tokens_seen": 61366660, + "step": 979 + }, + { + "epoch": 3.2579034941763725, + "loss": 0.6620635986328125, + "loss_ce": 0.0005646056379191577, + "loss_iou": 0.228515625, + "loss_num": 0.041015625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 61366660, + "step": 979 + }, + { + "epoch": 3.261231281198003, + "grad_norm": 21.63203239440918, + "learning_rate": 5e-06, + "loss": 0.6904, + "num_input_tokens_seen": 61429692, + "step": 980 + }, + { + "epoch": 3.261231281198003, + "loss": 0.768730103969574, + "loss_ce": 0.000541669491212815, + "loss_iou": 0.2578125, + "loss_num": 0.051025390625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 61429692, + "step": 980 + }, + { + "epoch": 3.2645590682196337, + "grad_norm": 26.75554656982422, + "learning_rate": 5e-06, + "loss": 0.9439, + "num_input_tokens_seen": 61493484, + "step": 981 + }, + { + "epoch": 3.2645590682196337, + "loss": 0.9156728982925415, + "loss_ce": 2.3398324628942646e-05, + "loss_iou": 0.33203125, + "loss_num": 0.050537109375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 61493484, + "step": 981 + }, + { + "epoch": 3.2678868552412643, + "grad_norm": 34.384639739990234, + "learning_rate": 5e-06, + "loss": 1.0029, + "num_input_tokens_seen": 61556876, + "step": 982 + }, + { + "epoch": 3.2678868552412643, + "loss": 1.1158475875854492, + "loss_ce": 0.00110146077349782, + "loss_iou": 0.34765625, + "loss_num": 0.08349609375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 61556876, + "step": 982 + }, + { + "epoch": 3.271214642262895, + "grad_norm": 19.943857192993164, + "learning_rate": 5e-06, + "loss": 0.9756, + "num_input_tokens_seen": 61620480, + "step": 983 + }, + { + "epoch": 3.271214642262895, + "loss": 1.0187878608703613, + "loss_ce": 0.0007214570650830865, + "loss_iou": 0.392578125, + "loss_num": 0.04638671875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 61620480, + "step": 983 + }, + { + "epoch": 3.2745424292845255, + "grad_norm": 17.966175079345703, + "learning_rate": 5e-06, + "loss": 0.8658, + "num_input_tokens_seen": 61684128, + "step": 984 + }, + { + "epoch": 3.2745424292845255, + "loss": 0.9176982641220093, + "loss_ce": 0.0013164564734324813, + "loss_iou": 0.291015625, + "loss_num": 0.06689453125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 61684128, + "step": 984 + }, + { + "epoch": 3.277870216306156, + "grad_norm": 20.2591552734375, + "learning_rate": 5e-06, + "loss": 0.9037, + "num_input_tokens_seen": 61747004, + "step": 985 + }, + { + "epoch": 3.277870216306156, + "loss": 1.0413687229156494, + "loss_ce": 0.0015739183872938156, + "loss_iou": 0.41015625, + "loss_num": 0.04345703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 61747004, + "step": 985 + }, + { + "epoch": 3.2811980033277868, + "grad_norm": 17.5063419342041, + "learning_rate": 5e-06, + "loss": 0.5624, + "num_input_tokens_seen": 61807736, + "step": 986 + }, + { + "epoch": 3.2811980033277868, + "loss": 0.4131319522857666, + "loss_ce": 4.6039051085244864e-05, + "loss_iou": 0.10400390625, + "loss_num": 0.041015625, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 61807736, + "step": 986 + }, + { + "epoch": 3.284525790349418, + "grad_norm": 21.377288818359375, + "learning_rate": 5e-06, + "loss": 0.8481, + "num_input_tokens_seen": 61870936, + "step": 987 + }, + { + "epoch": 3.284525790349418, + "loss": 0.8774704337120056, + "loss_ce": 2.90050265903119e-05, + "loss_iou": 0.33203125, + "loss_num": 0.042724609375, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 61870936, + "step": 987 + }, + { + "epoch": 3.2878535773710484, + "grad_norm": 21.52035903930664, + "learning_rate": 5e-06, + "loss": 0.5701, + "num_input_tokens_seen": 61932556, + "step": 988 + }, + { + "epoch": 3.2878535773710484, + "loss": 0.8120724558830261, + "loss_ce": 0.00030482906731776893, + "loss_iou": 0.27734375, + "loss_num": 0.051025390625, + "loss_xval": 0.8125, + "num_input_tokens_seen": 61932556, + "step": 988 + }, + { + "epoch": 3.291181364392679, + "grad_norm": 18.4642276763916, + "learning_rate": 5e-06, + "loss": 0.9416, + "num_input_tokens_seen": 61996240, + "step": 989 + }, + { + "epoch": 3.291181364392679, + "loss": 0.9976834058761597, + "loss_ce": 0.00036900522536598146, + "loss_iou": 0.375, + "loss_num": 0.049560546875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 61996240, + "step": 989 + }, + { + "epoch": 3.2945091514143097, + "grad_norm": 12.169829368591309, + "learning_rate": 5e-06, + "loss": 0.9705, + "num_input_tokens_seen": 62058608, + "step": 990 + }, + { + "epoch": 3.2945091514143097, + "loss": 1.1562477350234985, + "loss_ce": 0.0007300969446077943, + "loss_iou": 0.396484375, + "loss_num": 0.07275390625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 62058608, + "step": 990 + }, + { + "epoch": 3.2978369384359403, + "grad_norm": 18.226551055908203, + "learning_rate": 5e-06, + "loss": 0.8279, + "num_input_tokens_seen": 62121952, + "step": 991 + }, + { + "epoch": 3.2978369384359403, + "loss": 0.7579995393753052, + "loss_ce": 6.495913839899004e-05, + "loss_iou": 0.26953125, + "loss_num": 0.04345703125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 62121952, + "step": 991 + }, + { + "epoch": 3.301164725457571, + "grad_norm": 5.881676197052002, + "learning_rate": 5e-06, + "loss": 0.4544, + "num_input_tokens_seen": 62182960, + "step": 992 + }, + { + "epoch": 3.301164725457571, + "loss": 0.5944963693618774, + "loss_ce": 1.391188743582461e-05, + "loss_iou": 0.2158203125, + "loss_num": 0.03271484375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 62182960, + "step": 992 + }, + { + "epoch": 3.3044925124792015, + "grad_norm": 23.40874671936035, + "learning_rate": 5e-06, + "loss": 1.0811, + "num_input_tokens_seen": 62245376, + "step": 993 + }, + { + "epoch": 3.3044925124792015, + "loss": 1.324781894683838, + "loss_ce": 0.0012955316342413425, + "loss_iou": 0.44140625, + "loss_num": 0.08740234375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 62245376, + "step": 993 + }, + { + "epoch": 3.307820299500832, + "grad_norm": 16.891769409179688, + "learning_rate": 5e-06, + "loss": 0.8996, + "num_input_tokens_seen": 62308924, + "step": 994 + }, + { + "epoch": 3.307820299500832, + "loss": 1.0149037837982178, + "loss_ce": 0.001720264321193099, + "loss_iou": 0.380859375, + "loss_num": 0.050048828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 62308924, + "step": 994 + }, + { + "epoch": 3.3111480865224627, + "grad_norm": 8.293322563171387, + "learning_rate": 5e-06, + "loss": 0.8275, + "num_input_tokens_seen": 62368672, + "step": 995 + }, + { + "epoch": 3.3111480865224627, + "loss": 0.8809036016464233, + "loss_ce": 4.425490624271333e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.08056640625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 62368672, + "step": 995 + }, + { + "epoch": 3.3144758735440933, + "grad_norm": 16.752803802490234, + "learning_rate": 5e-06, + "loss": 0.7262, + "num_input_tokens_seen": 62431712, + "step": 996 + }, + { + "epoch": 3.3144758735440933, + "loss": 0.7768771052360535, + "loss_ce": 2.1639707483700477e-05, + "loss_iou": 0.25390625, + "loss_num": 0.053955078125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 62431712, + "step": 996 + }, + { + "epoch": 3.317803660565724, + "grad_norm": 15.595096588134766, + "learning_rate": 5e-06, + "loss": 0.7665, + "num_input_tokens_seen": 62493536, + "step": 997 + }, + { + "epoch": 3.317803660565724, + "loss": 0.8226642608642578, + "loss_ce": 0.00015452262596227229, + "loss_iou": 0.27734375, + "loss_num": 0.053466796875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 62493536, + "step": 997 + }, + { + "epoch": 3.3211314475873546, + "grad_norm": 24.1756649017334, + "learning_rate": 5e-06, + "loss": 0.785, + "num_input_tokens_seen": 62557440, + "step": 998 + }, + { + "epoch": 3.3211314475873546, + "loss": 0.7211722731590271, + "loss_ce": 0.0009574384312145412, + "loss_iou": 0.27734375, + "loss_num": 0.033447265625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 62557440, + "step": 998 + }, + { + "epoch": 3.324459234608985, + "grad_norm": 8.496849060058594, + "learning_rate": 5e-06, + "loss": 0.8508, + "num_input_tokens_seen": 62620908, + "step": 999 + }, + { + "epoch": 3.324459234608985, + "loss": 0.9880321025848389, + "loss_ce": 0.0017039903905242682, + "loss_iou": 0.337890625, + "loss_num": 0.061767578125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 62620908, + "step": 999 + }, + { + "epoch": 3.327787021630616, + "grad_norm": 13.95791244506836, + "learning_rate": 5e-06, + "loss": 0.8183, + "num_input_tokens_seen": 62684036, + "step": 1000 + }, + { + "epoch": 3.327787021630616, + "eval_seeclick_CIoU": 0.11919866502285004, + "eval_seeclick_GIoU": 0.13886269554495811, + "eval_seeclick_IoU": 0.22386732697486877, + "eval_seeclick_MAE_all": 0.18997914344072342, + "eval_seeclick_MAE_h": 0.04969533532857895, + "eval_seeclick_MAE_w": 0.14520900696516037, + "eval_seeclick_MAE_x_boxes": 0.2827809154987335, + "eval_seeclick_MAE_y_boxes": 0.14949627220630646, + "eval_seeclick_NUM_probability": 0.9996586740016937, + "eval_seeclick_inside_bbox": 0.32083334028720856, + "eval_seeclick_loss": 2.743774175643921, + "eval_seeclick_loss_ce": 0.07989468798041344, + "eval_seeclick_loss_iou": 0.8560791015625, + "eval_seeclick_loss_num": 0.1868896484375, + "eval_seeclick_loss_xval": 2.6455078125, + "eval_seeclick_runtime": 71.1388, + "eval_seeclick_samples_per_second": 0.661, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 62684036, + "step": 1000 + }, + { + "epoch": 3.327787021630616, + "eval_icons_CIoU": 0.05206053704023361, + "eval_icons_GIoU": 0.16786546260118484, + "eval_icons_IoU": 0.20200002193450928, + "eval_icons_MAE_all": 0.14681052416563034, + "eval_icons_MAE_h": 0.08216147124767303, + "eval_icons_MAE_w": 0.1477516144514084, + "eval_icons_MAE_x_boxes": 0.1450214460492134, + "eval_icons_MAE_y_boxes": 0.04793294984847307, + "eval_icons_NUM_probability": 0.999993085861206, + "eval_icons_inside_bbox": 0.3888888955116272, + "eval_icons_loss": 2.3937511444091797, + "eval_icons_loss_ce": 1.3247173171748727e-06, + "eval_icons_loss_iou": 0.817138671875, + "eval_icons_loss_num": 0.1392059326171875, + "eval_icons_loss_xval": 2.33203125, + "eval_icons_runtime": 73.4327, + "eval_icons_samples_per_second": 0.681, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 62684036, + "step": 1000 + }, + { + "epoch": 3.327787021630616, + "eval_screenspot_CIoU": 0.05590761390825113, + "eval_screenspot_GIoU": 0.1182707001765569, + "eval_screenspot_IoU": 0.20136764148871103, + "eval_screenspot_MAE_all": 0.19364242255687714, + "eval_screenspot_MAE_h": 0.06537577758232753, + "eval_screenspot_MAE_w": 0.1785052220026652, + "eval_screenspot_MAE_x_boxes": 0.2493977944056193, + "eval_screenspot_MAE_y_boxes": 0.13203182319800058, + "eval_screenspot_NUM_probability": 0.9998934666315714, + "eval_screenspot_inside_bbox": 0.3787499964237213, + "eval_screenspot_loss": 2.7400693893432617, + "eval_screenspot_loss_ce": 0.00023837910460618636, + "eval_screenspot_loss_iou": 0.8929036458333334, + "eval_screenspot_loss_num": 0.197662353515625, + "eval_screenspot_loss_xval": 2.7724609375, + "eval_screenspot_runtime": 118.7252, + "eval_screenspot_samples_per_second": 0.75, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 62684036, + "step": 1000 + }, + { + "epoch": 3.327787021630616, + "eval_compot_CIoU": -0.032527330331504345, + "eval_compot_GIoU": 0.07551112771034241, + "eval_compot_IoU": 0.1327061653137207, + "eval_compot_MAE_all": 0.22042576223611832, + "eval_compot_MAE_h": 0.06776861473917961, + "eval_compot_MAE_w": 0.24602457880973816, + "eval_compot_MAE_x_boxes": 0.19308777898550034, + "eval_compot_MAE_y_boxes": 0.1478528529405594, + "eval_compot_NUM_probability": 0.9999693036079407, + "eval_compot_inside_bbox": 0.2395833358168602, + "eval_compot_loss": 2.906449556350708, + "eval_compot_loss_ce": 0.005752389319241047, + "eval_compot_loss_iou": 0.91650390625, + "eval_compot_loss_num": 0.211029052734375, + "eval_compot_loss_xval": 2.8876953125, + "eval_compot_runtime": 67.807, + "eval_compot_samples_per_second": 0.737, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 62684036, + "step": 1000 + }, + { + "epoch": 3.327787021630616, + "eval_custom_ui_MAE_all": 0.08277507498860359, + "eval_custom_ui_MAE_x": 0.07837143167853355, + "eval_custom_ui_MAE_y": 0.08717871829867363, + "eval_custom_ui_NUM_probability": 0.9999922215938568, + "eval_custom_ui_loss": 0.4004429578781128, + "eval_custom_ui_loss_ce": 4.6469912376778666e-05, + "eval_custom_ui_loss_num": 0.0777587890625, + "eval_custom_ui_loss_xval": 0.3887939453125, + "eval_custom_ui_runtime": 50.8348, + "eval_custom_ui_samples_per_second": 0.984, + "eval_custom_ui_steps_per_second": 0.039, + "num_input_tokens_seen": 62684036, + "step": 1000 + }, + { + "epoch": 3.327787021630616, + "loss": 0.3899743854999542, + "loss_ce": 8.180466102203354e-05, + "loss_iou": 0.0, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 62684036, + "step": 1000 + }, + { + "epoch": 3.3311148086522464, + "grad_norm": 11.472808837890625, + "learning_rate": 5e-06, + "loss": 0.683, + "num_input_tokens_seen": 62745984, + "step": 1001 + }, + { + "epoch": 3.3311148086522464, + "loss": 0.5718774199485779, + "loss_ce": 8.515124136465602e-06, + "loss_iou": 0.12451171875, + "loss_num": 0.064453125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 62745984, + "step": 1001 + }, + { + "epoch": 3.334442595673877, + "grad_norm": 24.495885848999023, + "learning_rate": 5e-06, + "loss": 0.8243, + "num_input_tokens_seen": 62810288, + "step": 1002 + }, + { + "epoch": 3.334442595673877, + "loss": 0.7644026279449463, + "loss_ce": 0.00012040699948556721, + "loss_iou": 0.240234375, + "loss_num": 0.056640625, + "loss_xval": 0.765625, + "num_input_tokens_seen": 62810288, + "step": 1002 + }, + { + "epoch": 3.3377703826955076, + "grad_norm": 31.466510772705078, + "learning_rate": 5e-06, + "loss": 0.9726, + "num_input_tokens_seen": 62873968, + "step": 1003 + }, + { + "epoch": 3.3377703826955076, + "loss": 0.8061179518699646, + "loss_ce": 0.00045388404396362603, + "loss_iou": 0.271484375, + "loss_num": 0.05224609375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 62873968, + "step": 1003 + }, + { + "epoch": 3.3410981697171382, + "grad_norm": 17.76407241821289, + "learning_rate": 5e-06, + "loss": 0.83, + "num_input_tokens_seen": 62935872, + "step": 1004 + }, + { + "epoch": 3.3410981697171382, + "loss": 0.5958296656608582, + "loss_ce": 0.001591385342180729, + "loss_iou": 0.1513671875, + "loss_num": 0.058349609375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 62935872, + "step": 1004 + }, + { + "epoch": 3.344425956738769, + "grad_norm": 19.567127227783203, + "learning_rate": 5e-06, + "loss": 0.8515, + "num_input_tokens_seen": 62996516, + "step": 1005 + }, + { + "epoch": 3.344425956738769, + "loss": 0.9327386617660522, + "loss_ce": 0.00036568206269294024, + "loss_iou": 0.31640625, + "loss_num": 0.060302734375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 62996516, + "step": 1005 + }, + { + "epoch": 3.3477537437603995, + "grad_norm": 28.632431030273438, + "learning_rate": 5e-06, + "loss": 0.9322, + "num_input_tokens_seen": 63059556, + "step": 1006 + }, + { + "epoch": 3.3477537437603995, + "loss": 0.8702507615089417, + "loss_ce": 0.000133581503177993, + "loss_iou": 0.2890625, + "loss_num": 0.05859375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 63059556, + "step": 1006 + }, + { + "epoch": 3.35108153078203, + "grad_norm": 13.562311172485352, + "learning_rate": 5e-06, + "loss": 0.6644, + "num_input_tokens_seen": 63122292, + "step": 1007 + }, + { + "epoch": 3.35108153078203, + "loss": 0.6380650401115417, + "loss_ce": 0.00018663291120901704, + "loss_iou": 0.1826171875, + "loss_num": 0.054443359375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 63122292, + "step": 1007 + }, + { + "epoch": 3.3544093178036607, + "grad_norm": 12.887714385986328, + "learning_rate": 5e-06, + "loss": 0.5642, + "num_input_tokens_seen": 63185440, + "step": 1008 + }, + { + "epoch": 3.3544093178036607, + "loss": 0.5903370380401611, + "loss_ce": 0.0017139973351731896, + "loss_iou": 0.2109375, + "loss_num": 0.033447265625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 63185440, + "step": 1008 + }, + { + "epoch": 3.3577371048252913, + "grad_norm": 23.336519241333008, + "learning_rate": 5e-06, + "loss": 0.8652, + "num_input_tokens_seen": 63248932, + "step": 1009 + }, + { + "epoch": 3.3577371048252913, + "loss": 0.8400235176086426, + "loss_ce": 0.00042392255272716284, + "loss_iou": 0.265625, + "loss_num": 0.0615234375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 63248932, + "step": 1009 + }, + { + "epoch": 3.361064891846922, + "grad_norm": 9.43217945098877, + "learning_rate": 5e-06, + "loss": 0.7828, + "num_input_tokens_seen": 63311068, + "step": 1010 + }, + { + "epoch": 3.361064891846922, + "loss": 0.7335468530654907, + "loss_ce": 0.00039258040487766266, + "loss_iou": 0.25390625, + "loss_num": 0.04541015625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 63311068, + "step": 1010 + }, + { + "epoch": 3.3643926788685525, + "grad_norm": 14.894099235534668, + "learning_rate": 5e-06, + "loss": 0.8479, + "num_input_tokens_seen": 63373624, + "step": 1011 + }, + { + "epoch": 3.3643926788685525, + "loss": 0.9643365144729614, + "loss_ce": 0.0007134462357498705, + "loss_iou": 0.380859375, + "loss_num": 0.0400390625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 63373624, + "step": 1011 + }, + { + "epoch": 3.367720465890183, + "grad_norm": 21.609052658081055, + "learning_rate": 5e-06, + "loss": 0.6569, + "num_input_tokens_seen": 63435304, + "step": 1012 + }, + { + "epoch": 3.367720465890183, + "loss": 0.5942494869232178, + "loss_ce": 1.1208974683540873e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.0458984375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 63435304, + "step": 1012 + }, + { + "epoch": 3.3710482529118138, + "grad_norm": 17.91049575805664, + "learning_rate": 5e-06, + "loss": 0.8687, + "num_input_tokens_seen": 63496096, + "step": 1013 + }, + { + "epoch": 3.3710482529118138, + "loss": 0.6522290706634521, + "loss_ce": 0.00025154344621114433, + "loss_iou": 0.16015625, + "loss_num": 0.06591796875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 63496096, + "step": 1013 + }, + { + "epoch": 3.3743760399334444, + "grad_norm": 4.573009490966797, + "learning_rate": 5e-06, + "loss": 0.5975, + "num_input_tokens_seen": 63557060, + "step": 1014 + }, + { + "epoch": 3.3743760399334444, + "loss": 0.42771950364112854, + "loss_ce": 0.0002292850404046476, + "loss_iou": 0.01611328125, + "loss_num": 0.0791015625, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 63557060, + "step": 1014 + }, + { + "epoch": 3.377703826955075, + "grad_norm": 13.058093070983887, + "learning_rate": 5e-06, + "loss": 0.6873, + "num_input_tokens_seen": 63619988, + "step": 1015 + }, + { + "epoch": 3.377703826955075, + "loss": 0.540930986404419, + "loss_ce": 3.743675188161433e-05, + "loss_iou": 0.16796875, + "loss_num": 0.041015625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 63619988, + "step": 1015 + }, + { + "epoch": 3.3810316139767056, + "grad_norm": 10.773247718811035, + "learning_rate": 5e-06, + "loss": 0.9057, + "num_input_tokens_seen": 63683036, + "step": 1016 + }, + { + "epoch": 3.3810316139767056, + "loss": 0.6826450228691101, + "loss_ce": 2.7821590265375562e-05, + "loss_iou": 0.25390625, + "loss_num": 0.035400390625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 63683036, + "step": 1016 + }, + { + "epoch": 3.384359400998336, + "grad_norm": 11.654807090759277, + "learning_rate": 5e-06, + "loss": 0.6626, + "num_input_tokens_seen": 63745488, + "step": 1017 + }, + { + "epoch": 3.384359400998336, + "loss": 0.5884538888931274, + "loss_ce": 7.501640357077122e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.061279296875, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 63745488, + "step": 1017 + }, + { + "epoch": 3.387687188019967, + "grad_norm": 32.220428466796875, + "learning_rate": 5e-06, + "loss": 0.7153, + "num_input_tokens_seen": 63808984, + "step": 1018 + }, + { + "epoch": 3.387687188019967, + "loss": 0.7895395755767822, + "loss_ce": 0.0002329161943634972, + "loss_iou": 0.279296875, + "loss_num": 0.0458984375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 63808984, + "step": 1018 + }, + { + "epoch": 3.3910149750415974, + "grad_norm": 9.35084056854248, + "learning_rate": 5e-06, + "loss": 0.7474, + "num_input_tokens_seen": 63869352, + "step": 1019 + }, + { + "epoch": 3.3910149750415974, + "loss": 0.9051174521446228, + "loss_ce": 8.812104351818562e-05, + "loss_iou": 0.30078125, + "loss_num": 0.06005859375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 63869352, + "step": 1019 + }, + { + "epoch": 3.394342762063228, + "grad_norm": 21.432973861694336, + "learning_rate": 5e-06, + "loss": 0.7779, + "num_input_tokens_seen": 63931168, + "step": 1020 + }, + { + "epoch": 3.394342762063228, + "loss": 0.8875124454498291, + "loss_ce": 0.00018336158245801926, + "loss_iou": 0.328125, + "loss_num": 0.046142578125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 63931168, + "step": 1020 + }, + { + "epoch": 3.3976705490848587, + "grad_norm": 16.27943992614746, + "learning_rate": 5e-06, + "loss": 0.8919, + "num_input_tokens_seen": 63994276, + "step": 1021 + }, + { + "epoch": 3.3976705490848587, + "loss": 0.7104352116584778, + "loss_ce": 0.0004742466553580016, + "loss_iou": 0.2734375, + "loss_num": 0.032958984375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 63994276, + "step": 1021 + }, + { + "epoch": 3.4009983361064893, + "grad_norm": 18.430397033691406, + "learning_rate": 5e-06, + "loss": 0.8877, + "num_input_tokens_seen": 64056064, + "step": 1022 + }, + { + "epoch": 3.4009983361064893, + "loss": 0.7179223299026489, + "loss_ce": 0.0005151316290721297, + "loss_iou": 0.1982421875, + "loss_num": 0.06396484375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 64056064, + "step": 1022 + }, + { + "epoch": 3.40432612312812, + "grad_norm": 10.066780090332031, + "learning_rate": 5e-06, + "loss": 1.0067, + "num_input_tokens_seen": 64119668, + "step": 1023 + }, + { + "epoch": 3.40432612312812, + "loss": 0.8341488838195801, + "loss_ce": 4.246793105266988e-05, + "loss_iou": 0.296875, + "loss_num": 0.048095703125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 64119668, + "step": 1023 + }, + { + "epoch": 3.4076539101497505, + "grad_norm": 7.158484935760498, + "learning_rate": 5e-06, + "loss": 0.9016, + "num_input_tokens_seen": 64183132, + "step": 1024 + }, + { + "epoch": 3.4076539101497505, + "loss": 0.7723921537399292, + "loss_ce": 0.0006635930621996522, + "loss_iou": 0.27734375, + "loss_num": 0.043701171875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 64183132, + "step": 1024 + }, + { + "epoch": 3.410981697171381, + "grad_norm": 23.170434951782227, + "learning_rate": 5e-06, + "loss": 0.985, + "num_input_tokens_seen": 64244404, + "step": 1025 + }, + { + "epoch": 3.410981697171381, + "loss": 0.9742788076400757, + "loss_ce": 3.5607161407824606e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0712890625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 64244404, + "step": 1025 + }, + { + "epoch": 3.4143094841930117, + "grad_norm": 15.608352661132812, + "learning_rate": 5e-06, + "loss": 0.764, + "num_input_tokens_seen": 64307508, + "step": 1026 + }, + { + "epoch": 3.4143094841930117, + "loss": 0.7705492973327637, + "loss_ce": 0.00016356556443497539, + "loss_iou": 0.291015625, + "loss_num": 0.037353515625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 64307508, + "step": 1026 + }, + { + "epoch": 3.4176372712146423, + "grad_norm": 22.033626556396484, + "learning_rate": 5e-06, + "loss": 0.724, + "num_input_tokens_seen": 64369472, + "step": 1027 + }, + { + "epoch": 3.4176372712146423, + "loss": 0.7757673859596252, + "loss_ce": 0.0007429937249980867, + "loss_iou": 0.244140625, + "loss_num": 0.057373046875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 64369472, + "step": 1027 + }, + { + "epoch": 3.420965058236273, + "grad_norm": 33.458152770996094, + "learning_rate": 5e-06, + "loss": 0.9622, + "num_input_tokens_seen": 64431116, + "step": 1028 + }, + { + "epoch": 3.420965058236273, + "loss": 1.2742621898651123, + "loss_ce": 0.0007026524399407208, + "loss_iou": 0.41015625, + "loss_num": 0.09033203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 64431116, + "step": 1028 + }, + { + "epoch": 3.4242928452579036, + "grad_norm": 10.938355445861816, + "learning_rate": 5e-06, + "loss": 0.9922, + "num_input_tokens_seen": 64491820, + "step": 1029 + }, + { + "epoch": 3.4242928452579036, + "loss": 1.0337855815887451, + "loss_ce": 0.00033826506114564836, + "loss_iou": 0.291015625, + "loss_num": 0.08984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 64491820, + "step": 1029 + }, + { + "epoch": 3.427620632279534, + "grad_norm": 14.789898872375488, + "learning_rate": 5e-06, + "loss": 0.8743, + "num_input_tokens_seen": 64555604, + "step": 1030 + }, + { + "epoch": 3.427620632279534, + "loss": 0.8684331178665161, + "loss_ce": 2.4945697077782825e-05, + "loss_iou": 0.337890625, + "loss_num": 0.038818359375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 64555604, + "step": 1030 + }, + { + "epoch": 3.430948419301165, + "grad_norm": 13.57603645324707, + "learning_rate": 5e-06, + "loss": 0.7378, + "num_input_tokens_seen": 64617848, + "step": 1031 + }, + { + "epoch": 3.430948419301165, + "loss": 0.9734039902687073, + "loss_ce": 0.0007477752515114844, + "loss_iou": 0.31640625, + "loss_num": 0.06787109375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 64617848, + "step": 1031 + }, + { + "epoch": 3.4342762063227954, + "grad_norm": 7.267496109008789, + "learning_rate": 5e-06, + "loss": 0.8297, + "num_input_tokens_seen": 64678864, + "step": 1032 + }, + { + "epoch": 3.4342762063227954, + "loss": 0.744103193283081, + "loss_ce": 0.0006950664101168513, + "loss_iou": 0.173828125, + "loss_num": 0.0791015625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 64678864, + "step": 1032 + }, + { + "epoch": 3.437603993344426, + "grad_norm": 9.682428359985352, + "learning_rate": 5e-06, + "loss": 0.9113, + "num_input_tokens_seen": 64741056, + "step": 1033 + }, + { + "epoch": 3.437603993344426, + "loss": 0.7183736562728882, + "loss_ce": 0.0008443902479484677, + "loss_iou": 0.22265625, + "loss_num": 0.0546875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 64741056, + "step": 1033 + }, + { + "epoch": 3.4409317803660566, + "grad_norm": 11.420270919799805, + "learning_rate": 5e-06, + "loss": 0.8273, + "num_input_tokens_seen": 64803852, + "step": 1034 + }, + { + "epoch": 3.4409317803660566, + "loss": 0.8885650038719177, + "loss_ce": 0.002090380061417818, + "loss_iou": 0.283203125, + "loss_num": 0.064453125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 64803852, + "step": 1034 + }, + { + "epoch": 3.4442595673876872, + "grad_norm": 10.965718269348145, + "learning_rate": 5e-06, + "loss": 0.9267, + "num_input_tokens_seen": 64865460, + "step": 1035 + }, + { + "epoch": 3.4442595673876872, + "loss": 0.7865220308303833, + "loss_ce": 2.2979209461482242e-05, + "loss_iou": 0.255859375, + "loss_num": 0.054931640625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 64865460, + "step": 1035 + }, + { + "epoch": 3.447587354409318, + "grad_norm": 12.703821182250977, + "learning_rate": 5e-06, + "loss": 0.8564, + "num_input_tokens_seen": 64929860, + "step": 1036 + }, + { + "epoch": 3.447587354409318, + "loss": 0.8818983435630798, + "loss_ce": 0.0012830996420234442, + "loss_iou": 0.349609375, + "loss_num": 0.036376953125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 64929860, + "step": 1036 + }, + { + "epoch": 3.4509151414309485, + "grad_norm": 12.575376510620117, + "learning_rate": 5e-06, + "loss": 0.8592, + "num_input_tokens_seen": 64991228, + "step": 1037 + }, + { + "epoch": 3.4509151414309485, + "loss": 0.5446336269378662, + "loss_ce": 0.00010847233352251351, + "loss_iou": 0.134765625, + "loss_num": 0.054931640625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 64991228, + "step": 1037 + }, + { + "epoch": 3.454242928452579, + "grad_norm": 18.06930160522461, + "learning_rate": 5e-06, + "loss": 0.8297, + "num_input_tokens_seen": 65052856, + "step": 1038 + }, + { + "epoch": 3.454242928452579, + "loss": 0.7931275963783264, + "loss_ce": 0.0008912733173929155, + "loss_iou": 0.255859375, + "loss_num": 0.05615234375, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 65052856, + "step": 1038 + }, + { + "epoch": 3.4575707154742097, + "grad_norm": 38.98135757446289, + "learning_rate": 5e-06, + "loss": 0.8898, + "num_input_tokens_seen": 65115964, + "step": 1039 + }, + { + "epoch": 3.4575707154742097, + "loss": 0.8223487138748169, + "loss_ce": 8.309633994940668e-05, + "loss_iou": 0.32421875, + "loss_num": 0.034912109375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 65115964, + "step": 1039 + }, + { + "epoch": 3.4608985024958403, + "grad_norm": 24.175413131713867, + "learning_rate": 5e-06, + "loss": 0.8058, + "num_input_tokens_seen": 65178592, + "step": 1040 + }, + { + "epoch": 3.4608985024958403, + "loss": 0.6951857209205627, + "loss_ce": 0.005732577759772539, + "loss_iou": 0.2353515625, + "loss_num": 0.043701171875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 65178592, + "step": 1040 + }, + { + "epoch": 3.464226289517471, + "grad_norm": 12.473821640014648, + "learning_rate": 5e-06, + "loss": 0.4723, + "num_input_tokens_seen": 65241096, + "step": 1041 + }, + { + "epoch": 3.464226289517471, + "loss": 0.5358234643936157, + "loss_ce": 0.00042302411748096347, + "loss_iou": 0.1982421875, + "loss_num": 0.02783203125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 65241096, + "step": 1041 + }, + { + "epoch": 3.4675540765391015, + "grad_norm": 7.259097576141357, + "learning_rate": 5e-06, + "loss": 0.6688, + "num_input_tokens_seen": 65304704, + "step": 1042 + }, + { + "epoch": 3.4675540765391015, + "loss": 0.8926037549972534, + "loss_ce": 0.0016125383554026484, + "loss_iou": 0.31640625, + "loss_num": 0.0517578125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 65304704, + "step": 1042 + }, + { + "epoch": 3.470881863560732, + "grad_norm": 9.943862915039062, + "learning_rate": 5e-06, + "loss": 0.7162, + "num_input_tokens_seen": 65366024, + "step": 1043 + }, + { + "epoch": 3.470881863560732, + "loss": 0.4994853138923645, + "loss_ce": 0.00033979519503191113, + "loss_iou": 0.1591796875, + "loss_num": 0.036376953125, + "loss_xval": 0.5, + "num_input_tokens_seen": 65366024, + "step": 1043 + }, + { + "epoch": 3.4742096505823628, + "grad_norm": 11.191426277160645, + "learning_rate": 5e-06, + "loss": 0.5539, + "num_input_tokens_seen": 65426728, + "step": 1044 + }, + { + "epoch": 3.4742096505823628, + "loss": 0.4565603733062744, + "loss_ce": 0.00013946183025836945, + "loss_iou": 0.14453125, + "loss_num": 0.03369140625, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 65426728, + "step": 1044 + }, + { + "epoch": 3.4775374376039934, + "grad_norm": 13.661975860595703, + "learning_rate": 5e-06, + "loss": 0.7925, + "num_input_tokens_seen": 65489952, + "step": 1045 + }, + { + "epoch": 3.4775374376039934, + "loss": 0.9170857071876526, + "loss_ce": 9.353942004963756e-05, + "loss_iou": 0.345703125, + "loss_num": 0.044921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 65489952, + "step": 1045 + }, + { + "epoch": 3.480865224625624, + "grad_norm": 12.216934204101562, + "learning_rate": 5e-06, + "loss": 0.8358, + "num_input_tokens_seen": 65553428, + "step": 1046 + }, + { + "epoch": 3.480865224625624, + "loss": 0.9110158085823059, + "loss_ce": 0.00037124729715287685, + "loss_iou": 0.353515625, + "loss_num": 0.040771484375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 65553428, + "step": 1046 + }, + { + "epoch": 3.4841930116472546, + "grad_norm": 12.635567665100098, + "learning_rate": 5e-06, + "loss": 0.7208, + "num_input_tokens_seen": 65617964, + "step": 1047 + }, + { + "epoch": 3.4841930116472546, + "loss": 0.6810963153839111, + "loss_ce": 0.0006764110876247287, + "loss_iou": 0.2490234375, + "loss_num": 0.036376953125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 65617964, + "step": 1047 + }, + { + "epoch": 3.487520798668885, + "grad_norm": 15.042474746704102, + "learning_rate": 5e-06, + "loss": 0.9429, + "num_input_tokens_seen": 65681980, + "step": 1048 + }, + { + "epoch": 3.487520798668885, + "loss": 1.086167573928833, + "loss_ce": 0.001694836188107729, + "loss_iou": 0.34765625, + "loss_num": 0.078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 65681980, + "step": 1048 + }, + { + "epoch": 3.490848585690516, + "grad_norm": 19.812339782714844, + "learning_rate": 5e-06, + "loss": 0.6651, + "num_input_tokens_seen": 65743944, + "step": 1049 + }, + { + "epoch": 3.490848585690516, + "loss": 0.5148214101791382, + "loss_ce": 5.08911361976061e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.044921875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 65743944, + "step": 1049 + }, + { + "epoch": 3.4941763727121464, + "grad_norm": 15.855693817138672, + "learning_rate": 5e-06, + "loss": 0.5802, + "num_input_tokens_seen": 65804424, + "step": 1050 + }, + { + "epoch": 3.4941763727121464, + "loss": 0.6694477796554565, + "loss_ce": 0.0002583569148555398, + "loss_iou": 0.189453125, + "loss_num": 0.05810546875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 65804424, + "step": 1050 + }, + { + "epoch": 3.497504159733777, + "grad_norm": 12.423200607299805, + "learning_rate": 5e-06, + "loss": 0.8533, + "num_input_tokens_seen": 65866992, + "step": 1051 + }, + { + "epoch": 3.497504159733777, + "loss": 0.8513665199279785, + "loss_ce": 4.8082820285344496e-05, + "loss_iou": 0.296875, + "loss_num": 0.052001953125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 65866992, + "step": 1051 + }, + { + "epoch": 3.5008319467554077, + "grad_norm": 14.53134536743164, + "learning_rate": 5e-06, + "loss": 0.7817, + "num_input_tokens_seen": 65930468, + "step": 1052 + }, + { + "epoch": 3.5008319467554077, + "loss": 0.7512681484222412, + "loss_ce": 0.0007798465667292476, + "loss_iou": 0.2734375, + "loss_num": 0.040771484375, + "loss_xval": 0.75, + "num_input_tokens_seen": 65930468, + "step": 1052 + }, + { + "epoch": 3.5041597337770383, + "grad_norm": 19.564502716064453, + "learning_rate": 5e-06, + "loss": 0.8821, + "num_input_tokens_seen": 65994552, + "step": 1053 + }, + { + "epoch": 3.5041597337770383, + "loss": 1.1724635362625122, + "loss_ce": 0.0010767867788672447, + "loss_iou": 0.408203125, + "loss_num": 0.0712890625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 65994552, + "step": 1053 + }, + { + "epoch": 3.507487520798669, + "grad_norm": 13.443198204040527, + "learning_rate": 5e-06, + "loss": 0.7527, + "num_input_tokens_seen": 66057744, + "step": 1054 + }, + { + "epoch": 3.507487520798669, + "loss": 0.7170261740684509, + "loss_ce": 0.00047345724306069314, + "loss_iou": 0.267578125, + "loss_num": 0.036376953125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 66057744, + "step": 1054 + }, + { + "epoch": 3.5108153078202995, + "grad_norm": 14.912182807922363, + "learning_rate": 5e-06, + "loss": 0.7259, + "num_input_tokens_seen": 66121440, + "step": 1055 + }, + { + "epoch": 3.5108153078202995, + "loss": 0.6320114135742188, + "loss_ce": 0.00017547918832860887, + "loss_iou": 0.2451171875, + "loss_num": 0.0281982421875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 66121440, + "step": 1055 + }, + { + "epoch": 3.51414309484193, + "grad_norm": 24.48227310180664, + "learning_rate": 5e-06, + "loss": 0.8422, + "num_input_tokens_seen": 66184688, + "step": 1056 + }, + { + "epoch": 3.51414309484193, + "loss": 0.7655551433563232, + "loss_ce": 0.0006625619134865701, + "loss_iou": 0.263671875, + "loss_num": 0.04736328125, + "loss_xval": 0.765625, + "num_input_tokens_seen": 66184688, + "step": 1056 + }, + { + "epoch": 3.5174708818635607, + "grad_norm": 12.773367881774902, + "learning_rate": 5e-06, + "loss": 0.7156, + "num_input_tokens_seen": 66247292, + "step": 1057 + }, + { + "epoch": 3.5174708818635607, + "loss": 0.5347628593444824, + "loss_ce": 0.00046112615382298827, + "loss_iou": 0.1669921875, + "loss_num": 0.039794921875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 66247292, + "step": 1057 + }, + { + "epoch": 3.5207986688851913, + "grad_norm": 11.14990234375, + "learning_rate": 5e-06, + "loss": 0.5024, + "num_input_tokens_seen": 66310532, + "step": 1058 + }, + { + "epoch": 3.5207986688851913, + "loss": 0.6813678741455078, + "loss_ce": 0.0007648678729310632, + "loss_iou": 0.236328125, + "loss_num": 0.04150390625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 66310532, + "step": 1058 + }, + { + "epoch": 3.524126455906822, + "grad_norm": 16.360013961791992, + "learning_rate": 5e-06, + "loss": 0.9234, + "num_input_tokens_seen": 66373648, + "step": 1059 + }, + { + "epoch": 3.524126455906822, + "loss": 0.9731760025024414, + "loss_ce": 3.146879680571146e-05, + "loss_iou": 0.345703125, + "loss_num": 0.056396484375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 66373648, + "step": 1059 + }, + { + "epoch": 3.5274542429284526, + "grad_norm": 9.46167278289795, + "learning_rate": 5e-06, + "loss": 0.4595, + "num_input_tokens_seen": 66436316, + "step": 1060 + }, + { + "epoch": 3.5274542429284526, + "loss": 0.5476481914520264, + "loss_ce": 4.0785591409076005e-05, + "loss_iou": 0.197265625, + "loss_num": 0.03076171875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 66436316, + "step": 1060 + }, + { + "epoch": 3.530782029950083, + "grad_norm": 14.10741901397705, + "learning_rate": 5e-06, + "loss": 0.9308, + "num_input_tokens_seen": 66498344, + "step": 1061 + }, + { + "epoch": 3.530782029950083, + "loss": 0.9000440835952759, + "loss_ce": 0.002461038064211607, + "loss_iou": 0.310546875, + "loss_num": 0.054931640625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 66498344, + "step": 1061 + }, + { + "epoch": 3.534109816971714, + "grad_norm": 23.609783172607422, + "learning_rate": 5e-06, + "loss": 0.9126, + "num_input_tokens_seen": 66562436, + "step": 1062 + }, + { + "epoch": 3.534109816971714, + "loss": 0.7102041840553284, + "loss_ce": 0.00036530819488689303, + "loss_iou": 0.248046875, + "loss_num": 0.04248046875, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 66562436, + "step": 1062 + }, + { + "epoch": 3.5374376039933444, + "grad_norm": 20.392234802246094, + "learning_rate": 5e-06, + "loss": 0.5586, + "num_input_tokens_seen": 66624320, + "step": 1063 + }, + { + "epoch": 3.5374376039933444, + "loss": 0.573874831199646, + "loss_ce": 0.0008768028346821666, + "loss_iou": 0.1318359375, + "loss_num": 0.061767578125, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 66624320, + "step": 1063 + }, + { + "epoch": 3.540765391014975, + "grad_norm": 14.3955078125, + "learning_rate": 5e-06, + "loss": 0.772, + "num_input_tokens_seen": 66687376, + "step": 1064 + }, + { + "epoch": 3.540765391014975, + "loss": 0.6318628787994385, + "loss_ce": 2.6940453608403914e-05, + "loss_iou": 0.1611328125, + "loss_num": 0.061767578125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 66687376, + "step": 1064 + }, + { + "epoch": 3.5440931780366056, + "grad_norm": 16.033170700073242, + "learning_rate": 5e-06, + "loss": 0.6708, + "num_input_tokens_seen": 66751300, + "step": 1065 + }, + { + "epoch": 3.5440931780366056, + "loss": 0.6771686673164368, + "loss_ce": 0.0011432725004851818, + "loss_iou": 0.25390625, + "loss_num": 0.033935546875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 66751300, + "step": 1065 + }, + { + "epoch": 3.5474209650582362, + "grad_norm": 19.82112693786621, + "learning_rate": 5e-06, + "loss": 1.0317, + "num_input_tokens_seen": 66814000, + "step": 1066 + }, + { + "epoch": 3.5474209650582362, + "loss": 1.0494093894958496, + "loss_ce": 9.300906822318211e-05, + "loss_iou": 0.333984375, + "loss_num": 0.07568359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 66814000, + "step": 1066 + }, + { + "epoch": 3.550748752079867, + "grad_norm": 25.609535217285156, + "learning_rate": 5e-06, + "loss": 0.7597, + "num_input_tokens_seen": 66877452, + "step": 1067 + }, + { + "epoch": 3.550748752079867, + "loss": 0.6766536831855774, + "loss_ce": 0.0016658806707710028, + "loss_iou": 0.240234375, + "loss_num": 0.0390625, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 66877452, + "step": 1067 + }, + { + "epoch": 3.5540765391014975, + "grad_norm": 17.325336456298828, + "learning_rate": 5e-06, + "loss": 0.6623, + "num_input_tokens_seen": 66937972, + "step": 1068 + }, + { + "epoch": 3.5540765391014975, + "loss": 0.830532431602478, + "loss_ce": 0.0003322733100503683, + "loss_iou": 0.21875, + "loss_num": 0.07861328125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 66937972, + "step": 1068 + }, + { + "epoch": 3.557404326123128, + "grad_norm": 22.54002571105957, + "learning_rate": 5e-06, + "loss": 0.8791, + "num_input_tokens_seen": 67000040, + "step": 1069 + }, + { + "epoch": 3.557404326123128, + "loss": 1.1053575277328491, + "loss_ce": 0.00037706823786720634, + "loss_iou": 0.34375, + "loss_num": 0.08349609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 67000040, + "step": 1069 + }, + { + "epoch": 3.5607321131447587, + "grad_norm": 9.65036678314209, + "learning_rate": 5e-06, + "loss": 0.699, + "num_input_tokens_seen": 67062404, + "step": 1070 + }, + { + "epoch": 3.5607321131447587, + "loss": 0.52567458152771, + "loss_ce": 3.979210669058375e-05, + "loss_iou": 0.158203125, + "loss_num": 0.041748046875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 67062404, + "step": 1070 + }, + { + "epoch": 3.5640599001663893, + "grad_norm": 9.910517692565918, + "learning_rate": 5e-06, + "loss": 0.7048, + "num_input_tokens_seen": 67124276, + "step": 1071 + }, + { + "epoch": 3.5640599001663893, + "loss": 0.6014664173126221, + "loss_ce": 2.5981264116126113e-05, + "loss_iou": 0.1875, + "loss_num": 0.045166015625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 67124276, + "step": 1071 + }, + { + "epoch": 3.56738768718802, + "grad_norm": 15.538228034973145, + "learning_rate": 5e-06, + "loss": 0.9398, + "num_input_tokens_seen": 67187400, + "step": 1072 + }, + { + "epoch": 3.56738768718802, + "loss": 1.0962104797363281, + "loss_ce": 1.9056995370192453e-05, + "loss_iou": 0.376953125, + "loss_num": 0.068359375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 67187400, + "step": 1072 + }, + { + "epoch": 3.5707154742096505, + "grad_norm": 9.4950532913208, + "learning_rate": 5e-06, + "loss": 0.879, + "num_input_tokens_seen": 67249424, + "step": 1073 + }, + { + "epoch": 3.5707154742096505, + "loss": 0.9734940528869629, + "loss_ce": 0.00010540573566686362, + "loss_iou": 0.306640625, + "loss_num": 0.07177734375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 67249424, + "step": 1073 + }, + { + "epoch": 3.574043261231281, + "grad_norm": 18.527990341186523, + "learning_rate": 5e-06, + "loss": 0.8317, + "num_input_tokens_seen": 67313036, + "step": 1074 + }, + { + "epoch": 3.574043261231281, + "loss": 0.6008124947547913, + "loss_ce": 0.0003486633358988911, + "loss_iou": 0.185546875, + "loss_num": 0.045654296875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 67313036, + "step": 1074 + }, + { + "epoch": 3.5773710482529117, + "grad_norm": 26.33303451538086, + "learning_rate": 5e-06, + "loss": 0.8054, + "num_input_tokens_seen": 67375220, + "step": 1075 + }, + { + "epoch": 3.5773710482529117, + "loss": 0.8885135054588318, + "loss_ce": 0.0008181866724044085, + "loss_iou": 0.306640625, + "loss_num": 0.054931640625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 67375220, + "step": 1075 + }, + { + "epoch": 3.5806988352745424, + "grad_norm": 13.492464065551758, + "learning_rate": 5e-06, + "loss": 0.7742, + "num_input_tokens_seen": 67436772, + "step": 1076 + }, + { + "epoch": 3.5806988352745424, + "loss": 0.775583803653717, + "loss_ce": 0.00019318400882184505, + "loss_iou": 0.2294921875, + "loss_num": 0.06298828125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 67436772, + "step": 1076 + }, + { + "epoch": 3.584026622296173, + "grad_norm": 13.470839500427246, + "learning_rate": 5e-06, + "loss": 0.7942, + "num_input_tokens_seen": 67498376, + "step": 1077 + }, + { + "epoch": 3.584026622296173, + "loss": 0.5680028200149536, + "loss_ce": 9.682446034275927e-06, + "loss_iou": 0.1904296875, + "loss_num": 0.03759765625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 67498376, + "step": 1077 + }, + { + "epoch": 3.5873544093178036, + "grad_norm": 11.887737274169922, + "learning_rate": 5e-06, + "loss": 0.7184, + "num_input_tokens_seen": 67559900, + "step": 1078 + }, + { + "epoch": 3.5873544093178036, + "loss": 0.8478894233703613, + "loss_ce": 0.0009656131733208895, + "loss_iou": 0.248046875, + "loss_num": 0.0703125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 67559900, + "step": 1078 + }, + { + "epoch": 3.590682196339434, + "grad_norm": 32.69264602661133, + "learning_rate": 5e-06, + "loss": 0.7889, + "num_input_tokens_seen": 67622724, + "step": 1079 + }, + { + "epoch": 3.590682196339434, + "loss": 0.760546863079071, + "loss_ce": 4.88169098389335e-05, + "loss_iou": 0.259765625, + "loss_num": 0.04833984375, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 67622724, + "step": 1079 + }, + { + "epoch": 3.594009983361065, + "grad_norm": 15.543495178222656, + "learning_rate": 5e-06, + "loss": 0.6654, + "num_input_tokens_seen": 67683704, + "step": 1080 + }, + { + "epoch": 3.594009983361065, + "loss": 0.6395562291145325, + "loss_ce": 2.9867431294405833e-05, + "loss_iou": 0.1640625, + "loss_num": 0.06201171875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 67683704, + "step": 1080 + }, + { + "epoch": 3.5973377703826954, + "grad_norm": 22.80522918701172, + "learning_rate": 5e-06, + "loss": 0.8463, + "num_input_tokens_seen": 67746952, + "step": 1081 + }, + { + "epoch": 3.5973377703826954, + "loss": 0.8648264408111572, + "loss_ce": 0.000446604797616601, + "loss_iou": 0.30078125, + "loss_num": 0.052490234375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 67746952, + "step": 1081 + }, + { + "epoch": 3.600665557404326, + "grad_norm": 18.392467498779297, + "learning_rate": 5e-06, + "loss": 0.6663, + "num_input_tokens_seen": 67809764, + "step": 1082 + }, + { + "epoch": 3.600665557404326, + "loss": 0.741300106048584, + "loss_ce": 0.0014319655019789934, + "loss_iou": 0.271484375, + "loss_num": 0.039306640625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 67809764, + "step": 1082 + }, + { + "epoch": 3.6039933444259566, + "grad_norm": 10.33826732635498, + "learning_rate": 5e-06, + "loss": 0.6008, + "num_input_tokens_seen": 67871348, + "step": 1083 + }, + { + "epoch": 3.6039933444259566, + "loss": 0.4612269103527069, + "loss_ce": 4.5263179345056415e-05, + "loss_iou": 0.11865234375, + "loss_num": 0.044677734375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 67871348, + "step": 1083 + }, + { + "epoch": 3.6073211314475873, + "grad_norm": 26.75817108154297, + "learning_rate": 5e-06, + "loss": 0.7098, + "num_input_tokens_seen": 67934820, + "step": 1084 + }, + { + "epoch": 3.6073211314475873, + "loss": 0.7330468893051147, + "loss_ce": 1.4662471585324965e-05, + "loss_iou": 0.279296875, + "loss_num": 0.03515625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 67934820, + "step": 1084 + }, + { + "epoch": 3.610648918469218, + "grad_norm": 14.098214149475098, + "learning_rate": 5e-06, + "loss": 0.7828, + "num_input_tokens_seen": 67996592, + "step": 1085 + }, + { + "epoch": 3.610648918469218, + "loss": 0.6801908016204834, + "loss_ce": 1.5059520592330955e-05, + "loss_iou": 0.228515625, + "loss_num": 0.044677734375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 67996592, + "step": 1085 + }, + { + "epoch": 3.6139767054908485, + "grad_norm": 11.56197452545166, + "learning_rate": 5e-06, + "loss": 0.7931, + "num_input_tokens_seen": 68060428, + "step": 1086 + }, + { + "epoch": 3.6139767054908485, + "loss": 0.7243208885192871, + "loss_ce": 0.0006881133886054158, + "loss_iou": 0.2236328125, + "loss_num": 0.05517578125, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 68060428, + "step": 1086 + }, + { + "epoch": 3.617304492512479, + "grad_norm": 9.20474910736084, + "learning_rate": 5e-06, + "loss": 0.7987, + "num_input_tokens_seen": 68120428, + "step": 1087 + }, + { + "epoch": 3.617304492512479, + "loss": 0.9417086243629456, + "loss_ce": 0.00079060293501243, + "loss_iou": 0.28515625, + "loss_num": 0.07421875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 68120428, + "step": 1087 + }, + { + "epoch": 3.6206322795341097, + "grad_norm": 23.553590774536133, + "learning_rate": 5e-06, + "loss": 0.7281, + "num_input_tokens_seen": 68183092, + "step": 1088 + }, + { + "epoch": 3.6206322795341097, + "loss": 0.6970406770706177, + "loss_ce": 1.9182030882802792e-05, + "loss_iou": 0.177734375, + "loss_num": 0.068359375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 68183092, + "step": 1088 + }, + { + "epoch": 3.6239600665557403, + "grad_norm": 16.959150314331055, + "learning_rate": 5e-06, + "loss": 0.6518, + "num_input_tokens_seen": 68245676, + "step": 1089 + }, + { + "epoch": 3.6239600665557403, + "loss": 0.6579831838607788, + "loss_ce": 2.4194079742301255e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.04345703125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 68245676, + "step": 1089 + }, + { + "epoch": 3.627287853577371, + "grad_norm": 47.85329055786133, + "learning_rate": 5e-06, + "loss": 0.7832, + "num_input_tokens_seen": 68308340, + "step": 1090 + }, + { + "epoch": 3.627287853577371, + "loss": 0.8862950801849365, + "loss_ce": 0.00030875191441737115, + "loss_iou": 0.328125, + "loss_num": 0.0458984375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 68308340, + "step": 1090 + }, + { + "epoch": 3.6306156405990015, + "grad_norm": 13.920936584472656, + "learning_rate": 5e-06, + "loss": 0.7249, + "num_input_tokens_seen": 68370692, + "step": 1091 + }, + { + "epoch": 3.6306156405990015, + "loss": 0.8974728584289551, + "loss_ce": 0.0002561031433288008, + "loss_iou": 0.33203125, + "loss_num": 0.047119140625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 68370692, + "step": 1091 + }, + { + "epoch": 3.633943427620632, + "grad_norm": 9.428908348083496, + "learning_rate": 5e-06, + "loss": 0.6884, + "num_input_tokens_seen": 68432480, + "step": 1092 + }, + { + "epoch": 3.633943427620632, + "loss": 0.6953365802764893, + "loss_ce": 2.4052456865319982e-05, + "loss_iou": 0.232421875, + "loss_num": 0.04638671875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 68432480, + "step": 1092 + }, + { + "epoch": 3.6372712146422628, + "grad_norm": 15.467686653137207, + "learning_rate": 5e-06, + "loss": 0.6283, + "num_input_tokens_seen": 68494344, + "step": 1093 + }, + { + "epoch": 3.6372712146422628, + "loss": 0.839697539806366, + "loss_ce": 0.0005862598773092031, + "loss_iou": 0.333984375, + "loss_num": 0.033935546875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 68494344, + "step": 1093 + }, + { + "epoch": 3.6405990016638934, + "grad_norm": 13.4644775390625, + "learning_rate": 5e-06, + "loss": 0.9332, + "num_input_tokens_seen": 68556756, + "step": 1094 + }, + { + "epoch": 3.6405990016638934, + "loss": 1.0598011016845703, + "loss_ce": 0.00023084873100742698, + "loss_iou": 0.365234375, + "loss_num": 0.06591796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 68556756, + "step": 1094 + }, + { + "epoch": 3.643926788685524, + "grad_norm": 7.175938129425049, + "learning_rate": 5e-06, + "loss": 0.7075, + "num_input_tokens_seen": 68616224, + "step": 1095 + }, + { + "epoch": 3.643926788685524, + "loss": 0.7629657983779907, + "loss_ce": 2.6325005819671787e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.057373046875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 68616224, + "step": 1095 + }, + { + "epoch": 3.6472545757071546, + "grad_norm": 18.110021591186523, + "learning_rate": 5e-06, + "loss": 0.7513, + "num_input_tokens_seen": 68679384, + "step": 1096 + }, + { + "epoch": 3.6472545757071546, + "loss": 0.542603611946106, + "loss_ce": 0.00012313880142755806, + "loss_iou": 0.1650390625, + "loss_num": 0.04248046875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 68679384, + "step": 1096 + }, + { + "epoch": 3.6505823627287852, + "grad_norm": 24.87841033935547, + "learning_rate": 5e-06, + "loss": 0.897, + "num_input_tokens_seen": 68740984, + "step": 1097 + }, + { + "epoch": 3.6505823627287852, + "loss": 0.7363914251327515, + "loss_ce": 0.0005515533266589046, + "loss_iou": 0.28125, + "loss_num": 0.03466796875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 68740984, + "step": 1097 + }, + { + "epoch": 3.653910149750416, + "grad_norm": 14.83497428894043, + "learning_rate": 5e-06, + "loss": 0.4878, + "num_input_tokens_seen": 68803040, + "step": 1098 + }, + { + "epoch": 3.653910149750416, + "loss": 0.4231252670288086, + "loss_ce": 2.955349555122666e-05, + "loss_iou": 0.1171875, + "loss_num": 0.037841796875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 68803040, + "step": 1098 + }, + { + "epoch": 3.6572379367720464, + "grad_norm": 10.35799503326416, + "learning_rate": 5e-06, + "loss": 0.7078, + "num_input_tokens_seen": 68866936, + "step": 1099 + }, + { + "epoch": 3.6572379367720464, + "loss": 0.56097412109375, + "loss_ce": 0.0014343142975121737, + "loss_iou": 0.173828125, + "loss_num": 0.04248046875, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 68866936, + "step": 1099 + }, + { + "epoch": 3.660565723793677, + "grad_norm": 20.12316131591797, + "learning_rate": 5e-06, + "loss": 0.6702, + "num_input_tokens_seen": 68927524, + "step": 1100 + }, + { + "epoch": 3.660565723793677, + "loss": 0.9203138947486877, + "loss_ce": 0.0006362266722135246, + "loss_iou": 0.306640625, + "loss_num": 0.06103515625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 68927524, + "step": 1100 + }, + { + "epoch": 3.6638935108153077, + "grad_norm": 22.879741668701172, + "learning_rate": 5e-06, + "loss": 0.8751, + "num_input_tokens_seen": 68989620, + "step": 1101 + }, + { + "epoch": 3.6638935108153077, + "loss": 0.8814558982849121, + "loss_ce": 0.017686322331428528, + "loss_iou": 0.333984375, + "loss_num": 0.039306640625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 68989620, + "step": 1101 + }, + { + "epoch": 3.6672212978369383, + "grad_norm": 13.441774368286133, + "learning_rate": 5e-06, + "loss": 0.7847, + "num_input_tokens_seen": 69051672, + "step": 1102 + }, + { + "epoch": 3.6672212978369383, + "loss": 0.9437904357910156, + "loss_ce": 0.0003089719684794545, + "loss_iou": 0.33984375, + "loss_num": 0.052978515625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 69051672, + "step": 1102 + }, + { + "epoch": 3.670549084858569, + "grad_norm": 15.477404594421387, + "learning_rate": 5e-06, + "loss": 0.7314, + "num_input_tokens_seen": 69115388, + "step": 1103 + }, + { + "epoch": 3.670549084858569, + "loss": 0.6018927097320557, + "loss_ce": 0.0008184734033420682, + "loss_iou": 0.2138671875, + "loss_num": 0.03466796875, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 69115388, + "step": 1103 + }, + { + "epoch": 3.6738768718801995, + "grad_norm": 19.463781356811523, + "learning_rate": 5e-06, + "loss": 0.7772, + "num_input_tokens_seen": 69178392, + "step": 1104 + }, + { + "epoch": 3.6738768718801995, + "loss": 0.7169560194015503, + "loss_ce": 0.00028123901574872434, + "loss_iou": 0.1845703125, + "loss_num": 0.0693359375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 69178392, + "step": 1104 + }, + { + "epoch": 3.67720465890183, + "grad_norm": 7.575782775878906, + "learning_rate": 5e-06, + "loss": 0.5294, + "num_input_tokens_seen": 69239676, + "step": 1105 + }, + { + "epoch": 3.67720465890183, + "loss": 0.531362771987915, + "loss_ce": 0.0006010084762237966, + "loss_iou": 0.142578125, + "loss_num": 0.049072265625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 69239676, + "step": 1105 + }, + { + "epoch": 3.6805324459234607, + "grad_norm": 14.393789291381836, + "learning_rate": 5e-06, + "loss": 0.7182, + "num_input_tokens_seen": 69301396, + "step": 1106 + }, + { + "epoch": 3.6805324459234607, + "loss": 0.6370840072631836, + "loss_ce": 0.0003652620653156191, + "loss_iou": 0.177734375, + "loss_num": 0.056396484375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 69301396, + "step": 1106 + }, + { + "epoch": 3.6838602329450914, + "grad_norm": 11.867156028747559, + "learning_rate": 5e-06, + "loss": 0.9087, + "num_input_tokens_seen": 69362984, + "step": 1107 + }, + { + "epoch": 3.6838602329450914, + "loss": 0.5029775500297546, + "loss_ce": 0.00029203054145909846, + "loss_iou": 0.1015625, + "loss_num": 0.059814453125, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 69362984, + "step": 1107 + }, + { + "epoch": 3.687188019966722, + "grad_norm": 14.382938385009766, + "learning_rate": 5e-06, + "loss": 0.7059, + "num_input_tokens_seen": 69425984, + "step": 1108 + }, + { + "epoch": 3.687188019966722, + "loss": 0.7315462827682495, + "loss_ce": 0.00034508475800976157, + "loss_iou": 0.201171875, + "loss_num": 0.06591796875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 69425984, + "step": 1108 + }, + { + "epoch": 3.690515806988353, + "grad_norm": 21.43218994140625, + "learning_rate": 5e-06, + "loss": 0.7946, + "num_input_tokens_seen": 69489740, + "step": 1109 + }, + { + "epoch": 3.690515806988353, + "loss": 0.7349926829338074, + "loss_ce": 0.0006176835740916431, + "loss_iou": 0.2060546875, + "loss_num": 0.064453125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 69489740, + "step": 1109 + }, + { + "epoch": 3.6938435940099836, + "grad_norm": 16.769193649291992, + "learning_rate": 5e-06, + "loss": 0.6175, + "num_input_tokens_seen": 69550704, + "step": 1110 + }, + { + "epoch": 3.6938435940099836, + "loss": 0.634501576423645, + "loss_ce": 0.0007125326665118337, + "loss_iou": 0.17578125, + "loss_num": 0.056640625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 69550704, + "step": 1110 + }, + { + "epoch": 3.6971713810316142, + "grad_norm": 32.770267486572266, + "learning_rate": 5e-06, + "loss": 0.9464, + "num_input_tokens_seen": 69614880, + "step": 1111 + }, + { + "epoch": 3.6971713810316142, + "loss": 1.0868799686431885, + "loss_ce": 0.0009425695170648396, + "loss_iou": 0.390625, + "loss_num": 0.061279296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 69614880, + "step": 1111 + }, + { + "epoch": 3.700499168053245, + "grad_norm": 43.10527420043945, + "learning_rate": 5e-06, + "loss": 0.9164, + "num_input_tokens_seen": 69677600, + "step": 1112 + }, + { + "epoch": 3.700499168053245, + "loss": 0.7824146151542664, + "loss_ce": 4.9382551878807135e-06, + "loss_iou": 0.259765625, + "loss_num": 0.05224609375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 69677600, + "step": 1112 + }, + { + "epoch": 3.7038269550748755, + "grad_norm": 22.692411422729492, + "learning_rate": 5e-06, + "loss": 0.974, + "num_input_tokens_seen": 69739600, + "step": 1113 + }, + { + "epoch": 3.7038269550748755, + "loss": 0.9298710823059082, + "loss_ce": 0.0009159321198239923, + "loss_iou": 0.283203125, + "loss_num": 0.072265625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 69739600, + "step": 1113 + }, + { + "epoch": 3.707154742096506, + "grad_norm": 10.36915111541748, + "learning_rate": 5e-06, + "loss": 0.6903, + "num_input_tokens_seen": 69800996, + "step": 1114 + }, + { + "epoch": 3.707154742096506, + "loss": 0.6277588605880737, + "loss_ce": 0.000439511495642364, + "loss_iou": 0.2119140625, + "loss_num": 0.041015625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 69800996, + "step": 1114 + }, + { + "epoch": 3.7104825291181367, + "grad_norm": 15.28381061553955, + "learning_rate": 5e-06, + "loss": 0.8741, + "num_input_tokens_seen": 69863736, + "step": 1115 + }, + { + "epoch": 3.7104825291181367, + "loss": 0.9558818340301514, + "loss_ce": 0.0011699418537318707, + "loss_iou": 0.3125, + "loss_num": 0.0654296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 69863736, + "step": 1115 + }, + { + "epoch": 3.7138103161397673, + "grad_norm": 11.837628364562988, + "learning_rate": 5e-06, + "loss": 0.8104, + "num_input_tokens_seen": 69927776, + "step": 1116 + }, + { + "epoch": 3.7138103161397673, + "loss": 0.8550588488578796, + "loss_ce": 0.0003225263499189168, + "loss_iou": 0.302734375, + "loss_num": 0.049560546875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 69927776, + "step": 1116 + }, + { + "epoch": 3.717138103161398, + "grad_norm": 20.32373046875, + "learning_rate": 5e-06, + "loss": 0.8018, + "num_input_tokens_seen": 69991752, + "step": 1117 + }, + { + "epoch": 3.717138103161398, + "loss": 0.7404411435127258, + "loss_ce": 0.0006950714159756899, + "loss_iou": 0.2734375, + "loss_num": 0.038330078125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 69991752, + "step": 1117 + }, + { + "epoch": 3.7204658901830285, + "grad_norm": 14.117117881774902, + "learning_rate": 5e-06, + "loss": 0.7821, + "num_input_tokens_seen": 70055220, + "step": 1118 + }, + { + "epoch": 3.7204658901830285, + "loss": 1.1473777294158936, + "loss_ce": 0.0010153691982850432, + "loss_iou": 0.42578125, + "loss_num": 0.058837890625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 70055220, + "step": 1118 + }, + { + "epoch": 3.723793677204659, + "grad_norm": 11.328316688537598, + "learning_rate": 5e-06, + "loss": 0.6542, + "num_input_tokens_seen": 70117140, + "step": 1119 + }, + { + "epoch": 3.723793677204659, + "loss": 0.5794090032577515, + "loss_ce": 0.00030745528056286275, + "loss_iou": 0.1748046875, + "loss_num": 0.0458984375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 70117140, + "step": 1119 + }, + { + "epoch": 3.7271214642262898, + "grad_norm": 9.209413528442383, + "learning_rate": 5e-06, + "loss": 0.6797, + "num_input_tokens_seen": 70179304, + "step": 1120 + }, + { + "epoch": 3.7271214642262898, + "loss": 0.6281410455703735, + "loss_ce": 0.0010658506071195006, + "loss_iou": 0.1904296875, + "loss_num": 0.049072265625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 70179304, + "step": 1120 + }, + { + "epoch": 3.7304492512479204, + "grad_norm": 12.064332008361816, + "learning_rate": 5e-06, + "loss": 0.682, + "num_input_tokens_seen": 70242640, + "step": 1121 + }, + { + "epoch": 3.7304492512479204, + "loss": 0.6883260011672974, + "loss_ce": 0.00033775315387174487, + "loss_iou": 0.2431640625, + "loss_num": 0.04052734375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 70242640, + "step": 1121 + }, + { + "epoch": 3.733777038269551, + "grad_norm": 21.927749633789062, + "learning_rate": 5e-06, + "loss": 0.9156, + "num_input_tokens_seen": 70306468, + "step": 1122 + }, + { + "epoch": 3.733777038269551, + "loss": 0.9951090216636658, + "loss_ce": 0.00048012335901148617, + "loss_iou": 0.3515625, + "loss_num": 0.058349609375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 70306468, + "step": 1122 + }, + { + "epoch": 3.7371048252911816, + "grad_norm": 43.249393463134766, + "learning_rate": 5e-06, + "loss": 1.0435, + "num_input_tokens_seen": 70369504, + "step": 1123 + }, + { + "epoch": 3.7371048252911816, + "loss": 1.0370984077453613, + "loss_ce": 0.0014538050163537264, + "loss_iou": 0.306640625, + "loss_num": 0.08447265625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 70369504, + "step": 1123 + }, + { + "epoch": 3.740432612312812, + "grad_norm": 17.981069564819336, + "learning_rate": 5e-06, + "loss": 0.6572, + "num_input_tokens_seen": 70432836, + "step": 1124 + }, + { + "epoch": 3.740432612312812, + "loss": 0.7932811975479126, + "loss_ce": 6.828659388702363e-05, + "loss_iou": 0.2734375, + "loss_num": 0.049072265625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 70432836, + "step": 1124 + }, + { + "epoch": 3.743760399334443, + "grad_norm": 13.31531047821045, + "learning_rate": 5e-06, + "loss": 0.7378, + "num_input_tokens_seen": 70495112, + "step": 1125 + }, + { + "epoch": 3.743760399334443, + "loss": 0.9534047245979309, + "loss_ce": 0.001012163469567895, + "loss_iou": 0.357421875, + "loss_num": 0.04736328125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 70495112, + "step": 1125 + }, + { + "epoch": 3.7470881863560734, + "grad_norm": 13.23071002960205, + "learning_rate": 5e-06, + "loss": 1.0306, + "num_input_tokens_seen": 70557332, + "step": 1126 + }, + { + "epoch": 3.7470881863560734, + "loss": 1.243412971496582, + "loss_ce": 0.0004931364674121141, + "loss_iou": 0.4375, + "loss_num": 0.07373046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 70557332, + "step": 1126 + }, + { + "epoch": 3.750415973377704, + "grad_norm": 11.754276275634766, + "learning_rate": 5e-06, + "loss": 0.7345, + "num_input_tokens_seen": 70619872, + "step": 1127 + }, + { + "epoch": 3.750415973377704, + "loss": 0.9770193099975586, + "loss_ce": 0.0011892361799255013, + "loss_iou": 0.310546875, + "loss_num": 0.0703125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 70619872, + "step": 1127 + }, + { + "epoch": 3.7537437603993347, + "grad_norm": 16.051631927490234, + "learning_rate": 5e-06, + "loss": 1.002, + "num_input_tokens_seen": 70681060, + "step": 1128 + }, + { + "epoch": 3.7537437603993347, + "loss": 1.000293493270874, + "loss_ce": 0.002002517692744732, + "loss_iou": 0.3046875, + "loss_num": 0.07763671875, + "loss_xval": 1.0, + "num_input_tokens_seen": 70681060, + "step": 1128 + }, + { + "epoch": 3.7570715474209653, + "grad_norm": 12.550768852233887, + "learning_rate": 5e-06, + "loss": 0.7239, + "num_input_tokens_seen": 70742176, + "step": 1129 + }, + { + "epoch": 3.7570715474209653, + "loss": 0.8635753393173218, + "loss_ce": 0.000538200605660677, + "loss_iou": 0.29296875, + "loss_num": 0.05517578125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 70742176, + "step": 1129 + }, + { + "epoch": 3.760399334442596, + "grad_norm": 14.114124298095703, + "learning_rate": 5e-06, + "loss": 0.6735, + "num_input_tokens_seen": 70804608, + "step": 1130 + }, + { + "epoch": 3.760399334442596, + "loss": 0.4799818992614746, + "loss_ce": 0.0007338316063396633, + "loss_iou": 0.1591796875, + "loss_num": 0.031982421875, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 70804608, + "step": 1130 + }, + { + "epoch": 3.7637271214642265, + "grad_norm": 21.7076473236084, + "learning_rate": 5e-06, + "loss": 0.7799, + "num_input_tokens_seen": 70867088, + "step": 1131 + }, + { + "epoch": 3.7637271214642265, + "loss": 0.4688870310783386, + "loss_ce": 0.00016756025434006006, + "loss_iou": 0.1640625, + "loss_num": 0.0281982421875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 70867088, + "step": 1131 + }, + { + "epoch": 3.767054908485857, + "grad_norm": 10.689167022705078, + "learning_rate": 5e-06, + "loss": 0.6789, + "num_input_tokens_seen": 70928940, + "step": 1132 + }, + { + "epoch": 3.767054908485857, + "loss": 0.9793890714645386, + "loss_ce": 0.0003851721412502229, + "loss_iou": 0.373046875, + "loss_num": 0.046875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 70928940, + "step": 1132 + }, + { + "epoch": 3.7703826955074877, + "grad_norm": 18.24936866760254, + "learning_rate": 5e-06, + "loss": 0.8163, + "num_input_tokens_seen": 70989924, + "step": 1133 + }, + { + "epoch": 3.7703826955074877, + "loss": 0.7339310646057129, + "loss_ce": 4.4314670958556235e-05, + "loss_iou": 0.166015625, + "loss_num": 0.08056640625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 70989924, + "step": 1133 + }, + { + "epoch": 3.7737104825291183, + "grad_norm": 22.505451202392578, + "learning_rate": 5e-06, + "loss": 0.5424, + "num_input_tokens_seen": 71051320, + "step": 1134 + }, + { + "epoch": 3.7737104825291183, + "loss": 0.7140070199966431, + "loss_ce": 0.0006280855741351843, + "loss_iou": 0.255859375, + "loss_num": 0.040771484375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 71051320, + "step": 1134 + }, + { + "epoch": 3.777038269550749, + "grad_norm": 8.693553924560547, + "learning_rate": 5e-06, + "loss": 0.5119, + "num_input_tokens_seen": 71112332, + "step": 1135 + }, + { + "epoch": 3.777038269550749, + "loss": 0.5159156322479248, + "loss_ce": 3.124218710581772e-05, + "loss_iou": 0.19140625, + "loss_num": 0.0264892578125, + "loss_xval": 0.515625, + "num_input_tokens_seen": 71112332, + "step": 1135 + }, + { + "epoch": 3.7803660565723796, + "grad_norm": 12.785008430480957, + "learning_rate": 5e-06, + "loss": 0.6925, + "num_input_tokens_seen": 71175924, + "step": 1136 + }, + { + "epoch": 3.7803660565723796, + "loss": 0.6734526753425598, + "loss_ce": 0.0007231989875435829, + "loss_iou": 0.2373046875, + "loss_num": 0.03955078125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 71175924, + "step": 1136 + }, + { + "epoch": 3.78369384359401, + "grad_norm": 11.817228317260742, + "learning_rate": 5e-06, + "loss": 0.9368, + "num_input_tokens_seen": 71238860, + "step": 1137 + }, + { + "epoch": 3.78369384359401, + "loss": 0.8826572895050049, + "loss_ce": 0.00021094588737469167, + "loss_iou": 0.271484375, + "loss_num": 0.06787109375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 71238860, + "step": 1137 + }, + { + "epoch": 3.787021630615641, + "grad_norm": 13.229910850524902, + "learning_rate": 5e-06, + "loss": 0.6284, + "num_input_tokens_seen": 71301228, + "step": 1138 + }, + { + "epoch": 3.787021630615641, + "loss": 0.7485630512237549, + "loss_ce": 0.0002720615011639893, + "loss_iou": 0.271484375, + "loss_num": 0.041015625, + "loss_xval": 0.75, + "num_input_tokens_seen": 71301228, + "step": 1138 + }, + { + "epoch": 3.7903494176372714, + "grad_norm": 39.25370788574219, + "learning_rate": 5e-06, + "loss": 0.7781, + "num_input_tokens_seen": 71364208, + "step": 1139 + }, + { + "epoch": 3.7903494176372714, + "loss": 0.860191822052002, + "loss_ce": 8.443903061561286e-05, + "loss_iou": 0.314453125, + "loss_num": 0.04638671875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 71364208, + "step": 1139 + }, + { + "epoch": 3.793677204658902, + "grad_norm": 8.552867889404297, + "learning_rate": 5e-06, + "loss": 0.6363, + "num_input_tokens_seen": 71425864, + "step": 1140 + }, + { + "epoch": 3.793677204658902, + "loss": 0.6165646314620972, + "loss_ce": 0.000842000765260309, + "loss_iou": 0.140625, + "loss_num": 0.06689453125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 71425864, + "step": 1140 + }, + { + "epoch": 3.7970049916805326, + "grad_norm": 7.005518436431885, + "learning_rate": 5e-06, + "loss": 0.7277, + "num_input_tokens_seen": 71489608, + "step": 1141 + }, + { + "epoch": 3.7970049916805326, + "loss": 0.7442701458930969, + "loss_ce": 0.0013502332149073482, + "loss_iou": 0.248046875, + "loss_num": 0.049072265625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 71489608, + "step": 1141 + }, + { + "epoch": 3.8003327787021632, + "grad_norm": 7.645864486694336, + "learning_rate": 5e-06, + "loss": 0.6064, + "num_input_tokens_seen": 71551324, + "step": 1142 + }, + { + "epoch": 3.8003327787021632, + "loss": 0.48682481050491333, + "loss_ce": 0.00025257206289097667, + "loss_iou": 0.1767578125, + "loss_num": 0.0267333984375, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 71551324, + "step": 1142 + }, + { + "epoch": 3.803660565723794, + "grad_norm": 11.24929141998291, + "learning_rate": 5e-06, + "loss": 0.6623, + "num_input_tokens_seen": 71614812, + "step": 1143 + }, + { + "epoch": 3.803660565723794, + "loss": 0.5440496206283569, + "loss_ce": 4.3287254811730236e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.028076171875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 71614812, + "step": 1143 + }, + { + "epoch": 3.8069883527454245, + "grad_norm": 14.913579940795898, + "learning_rate": 5e-06, + "loss": 1.1182, + "num_input_tokens_seen": 71678540, + "step": 1144 + }, + { + "epoch": 3.8069883527454245, + "loss": 0.9577356576919556, + "loss_ce": 0.0007043948862701654, + "loss_iou": 0.33203125, + "loss_num": 0.058349609375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 71678540, + "step": 1144 + }, + { + "epoch": 3.810316139767055, + "grad_norm": 9.077905654907227, + "learning_rate": 5e-06, + "loss": 0.6147, + "num_input_tokens_seen": 71739620, + "step": 1145 + }, + { + "epoch": 3.810316139767055, + "loss": 0.5191451907157898, + "loss_ce": 4.1206356399925426e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.049560546875, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 71739620, + "step": 1145 + }, + { + "epoch": 3.8136439267886857, + "grad_norm": 19.119752883911133, + "learning_rate": 5e-06, + "loss": 0.6505, + "num_input_tokens_seen": 71801800, + "step": 1146 + }, + { + "epoch": 3.8136439267886857, + "loss": 0.6102641820907593, + "loss_ce": 0.0006450038054026663, + "loss_iou": 0.1884765625, + "loss_num": 0.04638671875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 71801800, + "step": 1146 + }, + { + "epoch": 3.8169717138103163, + "grad_norm": 24.023101806640625, + "learning_rate": 5e-06, + "loss": 0.6411, + "num_input_tokens_seen": 71866032, + "step": 1147 + }, + { + "epoch": 3.8169717138103163, + "loss": 0.600353479385376, + "loss_ce": 1.1719953363353852e-05, + "loss_iou": 0.2265625, + "loss_num": 0.0294189453125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 71866032, + "step": 1147 + }, + { + "epoch": 3.820299500831947, + "grad_norm": 11.998858451843262, + "learning_rate": 5e-06, + "loss": 0.7952, + "num_input_tokens_seen": 71926820, + "step": 1148 + }, + { + "epoch": 3.820299500831947, + "loss": 0.8667431473731995, + "loss_ce": 0.001142566092312336, + "loss_iou": 0.310546875, + "loss_num": 0.048583984375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 71926820, + "step": 1148 + }, + { + "epoch": 3.8236272878535775, + "grad_norm": 23.826696395874023, + "learning_rate": 5e-06, + "loss": 0.6687, + "num_input_tokens_seen": 71989184, + "step": 1149 + }, + { + "epoch": 3.8236272878535775, + "loss": 0.6472935080528259, + "loss_ce": 0.0059361206367611885, + "loss_iou": 0.1533203125, + "loss_num": 0.0673828125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 71989184, + "step": 1149 + }, + { + "epoch": 3.826955074875208, + "grad_norm": 15.457709312438965, + "learning_rate": 5e-06, + "loss": 0.6533, + "num_input_tokens_seen": 72052768, + "step": 1150 + }, + { + "epoch": 3.826955074875208, + "loss": 0.8420194983482361, + "loss_ce": 0.0013212742051109672, + "loss_iou": 0.30078125, + "loss_num": 0.04736328125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 72052768, + "step": 1150 + }, + { + "epoch": 3.8302828618968388, + "grad_norm": 11.85561466217041, + "learning_rate": 5e-06, + "loss": 0.5567, + "num_input_tokens_seen": 72115176, + "step": 1151 + }, + { + "epoch": 3.8302828618968388, + "loss": 0.491475909948349, + "loss_ce": 0.00014292128616943955, + "loss_iou": 0.1455078125, + "loss_num": 0.0400390625, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 72115176, + "step": 1151 + }, + { + "epoch": 3.8336106489184694, + "grad_norm": 14.689626693725586, + "learning_rate": 5e-06, + "loss": 0.7026, + "num_input_tokens_seen": 72176716, + "step": 1152 + }, + { + "epoch": 3.8336106489184694, + "loss": 0.9168117046356201, + "loss_ce": 0.00030778077780269086, + "loss_iou": 0.310546875, + "loss_num": 0.05908203125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 72176716, + "step": 1152 + }, + { + "epoch": 3.8369384359401, + "grad_norm": 13.6842679977417, + "learning_rate": 5e-06, + "loss": 0.9622, + "num_input_tokens_seen": 72239940, + "step": 1153 + }, + { + "epoch": 3.8369384359401, + "loss": 0.9152591824531555, + "loss_ce": 0.0009526039939373732, + "loss_iou": 0.314453125, + "loss_num": 0.057373046875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 72239940, + "step": 1153 + }, + { + "epoch": 3.8402662229617306, + "grad_norm": 11.269613265991211, + "learning_rate": 5e-06, + "loss": 0.7609, + "num_input_tokens_seen": 72301720, + "step": 1154 + }, + { + "epoch": 3.8402662229617306, + "loss": 0.6302889585494995, + "loss_ce": 0.001016515656374395, + "loss_iou": 0.22265625, + "loss_num": 0.037109375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 72301720, + "step": 1154 + }, + { + "epoch": 3.843594009983361, + "grad_norm": 21.826889038085938, + "learning_rate": 5e-06, + "loss": 1.0018, + "num_input_tokens_seen": 72363092, + "step": 1155 + }, + { + "epoch": 3.843594009983361, + "loss": 1.0488756895065308, + "loss_ce": 0.0009020923171192408, + "loss_iou": 0.34375, + "loss_num": 0.072265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 72363092, + "step": 1155 + }, + { + "epoch": 3.846921797004992, + "grad_norm": 27.468761444091797, + "learning_rate": 5e-06, + "loss": 0.9151, + "num_input_tokens_seen": 72427664, + "step": 1156 + }, + { + "epoch": 3.846921797004992, + "loss": 0.9939867258071899, + "loss_ce": 0.0002123451413353905, + "loss_iou": 0.38671875, + "loss_num": 0.0439453125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 72427664, + "step": 1156 + }, + { + "epoch": 3.8502495840266224, + "grad_norm": 20.527379989624023, + "learning_rate": 5e-06, + "loss": 0.9454, + "num_input_tokens_seen": 72492032, + "step": 1157 + }, + { + "epoch": 3.8502495840266224, + "loss": 0.8400707244873047, + "loss_ce": 0.0010204899590462446, + "loss_iou": 0.279296875, + "loss_num": 0.055908203125, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 72492032, + "step": 1157 + }, + { + "epoch": 3.853577371048253, + "grad_norm": 8.549810409545898, + "learning_rate": 5e-06, + "loss": 0.7699, + "num_input_tokens_seen": 72554224, + "step": 1158 + }, + { + "epoch": 3.853577371048253, + "loss": 0.627190351486206, + "loss_ce": 0.0021292981691658497, + "loss_iou": 0.173828125, + "loss_num": 0.055419921875, + "loss_xval": 0.625, + "num_input_tokens_seen": 72554224, + "step": 1158 + }, + { + "epoch": 3.8569051580698837, + "grad_norm": 21.32713508605957, + "learning_rate": 5e-06, + "loss": 0.6667, + "num_input_tokens_seen": 72617400, + "step": 1159 + }, + { + "epoch": 3.8569051580698837, + "loss": 0.8027868270874023, + "loss_ce": 5.2467978093773127e-05, + "loss_iou": 0.265625, + "loss_num": 0.05419921875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 72617400, + "step": 1159 + }, + { + "epoch": 3.8602329450915143, + "grad_norm": 12.14395809173584, + "learning_rate": 5e-06, + "loss": 0.7003, + "num_input_tokens_seen": 72679172, + "step": 1160 + }, + { + "epoch": 3.8602329450915143, + "loss": 0.6025677919387817, + "loss_ce": 2.867916555260308e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0308837890625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 72679172, + "step": 1160 + }, + { + "epoch": 3.863560732113145, + "grad_norm": 19.726367950439453, + "learning_rate": 5e-06, + "loss": 0.6465, + "num_input_tokens_seen": 72742260, + "step": 1161 + }, + { + "epoch": 3.863560732113145, + "loss": 0.6337836980819702, + "loss_ce": 0.00011673450353555381, + "loss_iou": 0.2294921875, + "loss_num": 0.034912109375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 72742260, + "step": 1161 + }, + { + "epoch": 3.8668885191347755, + "grad_norm": 15.199066162109375, + "learning_rate": 5e-06, + "loss": 0.6762, + "num_input_tokens_seen": 72803396, + "step": 1162 + }, + { + "epoch": 3.8668885191347755, + "loss": 0.8257670998573303, + "loss_ce": 8.353222074219957e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0537109375, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 72803396, + "step": 1162 + }, + { + "epoch": 3.870216306156406, + "grad_norm": 27.860286712646484, + "learning_rate": 5e-06, + "loss": 0.7779, + "num_input_tokens_seen": 72865844, + "step": 1163 + }, + { + "epoch": 3.870216306156406, + "loss": 0.9027190208435059, + "loss_ce": 0.0006194374873302877, + "loss_iou": 0.275390625, + "loss_num": 0.07080078125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 72865844, + "step": 1163 + }, + { + "epoch": 3.8735440931780367, + "grad_norm": 22.976346969604492, + "learning_rate": 5e-06, + "loss": 0.8613, + "num_input_tokens_seen": 72926168, + "step": 1164 + }, + { + "epoch": 3.8735440931780367, + "loss": 1.036806583404541, + "loss_ce": 0.0006737524527125061, + "loss_iou": 0.380859375, + "loss_num": 0.054931640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 72926168, + "step": 1164 + }, + { + "epoch": 3.8768718801996673, + "grad_norm": 8.25428581237793, + "learning_rate": 5e-06, + "loss": 0.7253, + "num_input_tokens_seen": 72987380, + "step": 1165 + }, + { + "epoch": 3.8768718801996673, + "loss": 0.6392136216163635, + "loss_ce": 5.344775854609907e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.0390625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 72987380, + "step": 1165 + }, + { + "epoch": 3.880199667221298, + "grad_norm": 16.900022506713867, + "learning_rate": 5e-06, + "loss": 0.7989, + "num_input_tokens_seen": 73051392, + "step": 1166 + }, + { + "epoch": 3.880199667221298, + "loss": 0.7662708759307861, + "loss_ce": 0.0011341598583385348, + "loss_iou": 0.26953125, + "loss_num": 0.044921875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 73051392, + "step": 1166 + }, + { + "epoch": 3.8835274542429286, + "grad_norm": 14.377167701721191, + "learning_rate": 5e-06, + "loss": 0.763, + "num_input_tokens_seen": 73113100, + "step": 1167 + }, + { + "epoch": 3.8835274542429286, + "loss": 0.5086641311645508, + "loss_ce": 0.00024124824267346412, + "loss_iou": 0.185546875, + "loss_num": 0.0274658203125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 73113100, + "step": 1167 + }, + { + "epoch": 3.886855241264559, + "grad_norm": 9.697162628173828, + "learning_rate": 5e-06, + "loss": 0.8776, + "num_input_tokens_seen": 73176080, + "step": 1168 + }, + { + "epoch": 3.886855241264559, + "loss": 0.7365698218345642, + "loss_ce": 0.0007299709832295775, + "loss_iou": 0.26171875, + "loss_num": 0.04296875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 73176080, + "step": 1168 + }, + { + "epoch": 3.89018302828619, + "grad_norm": 7.753446578979492, + "learning_rate": 5e-06, + "loss": 0.6761, + "num_input_tokens_seen": 73239540, + "step": 1169 + }, + { + "epoch": 3.89018302828619, + "loss": 0.8952956199645996, + "loss_ce": 0.0007643617573194206, + "loss_iou": 0.32421875, + "loss_num": 0.049072265625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 73239540, + "step": 1169 + }, + { + "epoch": 3.8935108153078204, + "grad_norm": 38.60930633544922, + "learning_rate": 5e-06, + "loss": 0.9386, + "num_input_tokens_seen": 73301948, + "step": 1170 + }, + { + "epoch": 3.8935108153078204, + "loss": 0.8330338001251221, + "loss_ce": 0.0005142604932188988, + "loss_iou": 0.26171875, + "loss_num": 0.0615234375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 73301948, + "step": 1170 + }, + { + "epoch": 3.896838602329451, + "grad_norm": 44.411895751953125, + "learning_rate": 5e-06, + "loss": 0.877, + "num_input_tokens_seen": 73364536, + "step": 1171 + }, + { + "epoch": 3.896838602329451, + "loss": 0.9410591125488281, + "loss_ce": 0.0005683889612555504, + "loss_iou": 0.3046875, + "loss_num": 0.06591796875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 73364536, + "step": 1171 + }, + { + "epoch": 3.9001663893510816, + "grad_norm": 15.136682510375977, + "learning_rate": 5e-06, + "loss": 0.5564, + "num_input_tokens_seen": 73426144, + "step": 1172 + }, + { + "epoch": 3.9001663893510816, + "loss": 0.4998212456703186, + "loss_ce": 0.00012645090464502573, + "loss_iou": 0.166015625, + "loss_num": 0.033447265625, + "loss_xval": 0.5, + "num_input_tokens_seen": 73426144, + "step": 1172 + }, + { + "epoch": 3.9034941763727122, + "grad_norm": 31.158117294311523, + "learning_rate": 5e-06, + "loss": 0.6874, + "num_input_tokens_seen": 73488980, + "step": 1173 + }, + { + "epoch": 3.9034941763727122, + "loss": 0.6511569023132324, + "loss_ce": 3.384947558515705e-05, + "loss_iou": 0.26171875, + "loss_num": 0.0255126953125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 73488980, + "step": 1173 + }, + { + "epoch": 3.906821963394343, + "grad_norm": 18.398252487182617, + "learning_rate": 5e-06, + "loss": 0.7025, + "num_input_tokens_seen": 73550568, + "step": 1174 + }, + { + "epoch": 3.906821963394343, + "loss": 0.8032513856887817, + "loss_ce": 0.0012493999674916267, + "loss_iou": 0.2578125, + "loss_num": 0.05712890625, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 73550568, + "step": 1174 + }, + { + "epoch": 3.9101497504159735, + "grad_norm": 14.897561073303223, + "learning_rate": 5e-06, + "loss": 0.6192, + "num_input_tokens_seen": 73614048, + "step": 1175 + }, + { + "epoch": 3.9101497504159735, + "loss": 0.6096196174621582, + "loss_ce": 0.000488759484142065, + "loss_iou": 0.1767578125, + "loss_num": 0.051025390625, + "loss_xval": 0.609375, + "num_input_tokens_seen": 73614048, + "step": 1175 + }, + { + "epoch": 3.913477537437604, + "grad_norm": 12.411521911621094, + "learning_rate": 5e-06, + "loss": 0.7318, + "num_input_tokens_seen": 73677156, + "step": 1176 + }, + { + "epoch": 3.913477537437604, + "loss": 0.7434451580047607, + "loss_ce": 0.0008914528880268335, + "loss_iou": 0.265625, + "loss_num": 0.04248046875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 73677156, + "step": 1176 + }, + { + "epoch": 3.9168053244592347, + "grad_norm": 12.991275787353516, + "learning_rate": 5e-06, + "loss": 0.7032, + "num_input_tokens_seen": 73739192, + "step": 1177 + }, + { + "epoch": 3.9168053244592347, + "loss": 0.7536220550537109, + "loss_ce": 0.00020409880380611867, + "loss_iou": 0.287109375, + "loss_num": 0.035400390625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 73739192, + "step": 1177 + }, + { + "epoch": 3.9201331114808653, + "grad_norm": 12.387129783630371, + "learning_rate": 5e-06, + "loss": 0.4625, + "num_input_tokens_seen": 73801340, + "step": 1178 + }, + { + "epoch": 3.9201331114808653, + "loss": 0.4774191081523895, + "loss_ce": 0.0004903915687464178, + "loss_iou": 0.10791015625, + "loss_num": 0.05224609375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 73801340, + "step": 1178 + }, + { + "epoch": 3.923460898502496, + "grad_norm": 13.816216468811035, + "learning_rate": 5e-06, + "loss": 0.5731, + "num_input_tokens_seen": 73863168, + "step": 1179 + }, + { + "epoch": 3.923460898502496, + "loss": 0.4799365699291229, + "loss_ce": 1.7145994206657633e-05, + "loss_iou": 0.177734375, + "loss_num": 0.02490234375, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 73863168, + "step": 1179 + }, + { + "epoch": 3.9267886855241265, + "grad_norm": 13.6268310546875, + "learning_rate": 5e-06, + "loss": 0.5817, + "num_input_tokens_seen": 73925652, + "step": 1180 + }, + { + "epoch": 3.9267886855241265, + "loss": 0.46266400814056396, + "loss_ce": 1.7535534425405785e-05, + "loss_iou": 0.1494140625, + "loss_num": 0.03271484375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 73925652, + "step": 1180 + }, + { + "epoch": 3.930116472545757, + "grad_norm": 10.935044288635254, + "learning_rate": 5e-06, + "loss": 0.7337, + "num_input_tokens_seen": 73988132, + "step": 1181 + }, + { + "epoch": 3.930116472545757, + "loss": 0.714240312576294, + "loss_ce": 0.000251059333095327, + "loss_iou": 0.201171875, + "loss_num": 0.0625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 73988132, + "step": 1181 + }, + { + "epoch": 3.9334442595673877, + "grad_norm": 21.19894790649414, + "learning_rate": 5e-06, + "loss": 0.7096, + "num_input_tokens_seen": 74051356, + "step": 1182 + }, + { + "epoch": 3.9334442595673877, + "loss": 0.7160288691520691, + "loss_ce": 0.00020855304319411516, + "loss_iou": 0.220703125, + "loss_num": 0.054931640625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 74051356, + "step": 1182 + }, + { + "epoch": 3.9367720465890184, + "grad_norm": 22.308738708496094, + "learning_rate": 5e-06, + "loss": 1.0778, + "num_input_tokens_seen": 74115356, + "step": 1183 + }, + { + "epoch": 3.9367720465890184, + "loss": 1.0566372871398926, + "loss_ce": 0.0003629096318036318, + "loss_iou": 0.349609375, + "loss_num": 0.0712890625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 74115356, + "step": 1183 + }, + { + "epoch": 3.940099833610649, + "grad_norm": 12.43722152709961, + "learning_rate": 5e-06, + "loss": 0.7557, + "num_input_tokens_seen": 74178668, + "step": 1184 + }, + { + "epoch": 3.940099833610649, + "loss": 0.5400795936584473, + "loss_ce": 4.0542239730712026e-05, + "loss_iou": 0.2021484375, + "loss_num": 0.027099609375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 74178668, + "step": 1184 + }, + { + "epoch": 3.9434276206322796, + "grad_norm": 12.8514404296875, + "learning_rate": 5e-06, + "loss": 0.706, + "num_input_tokens_seen": 74240964, + "step": 1185 + }, + { + "epoch": 3.9434276206322796, + "loss": 0.8476996421813965, + "loss_ce": 0.0007758617866784334, + "loss_iou": 0.2578125, + "loss_num": 0.06591796875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 74240964, + "step": 1185 + }, + { + "epoch": 3.94675540765391, + "grad_norm": 17.675451278686523, + "learning_rate": 5e-06, + "loss": 0.8197, + "num_input_tokens_seen": 74304024, + "step": 1186 + }, + { + "epoch": 3.94675540765391, + "loss": 0.9300236701965332, + "loss_ce": 9.207585389958695e-05, + "loss_iou": 0.337890625, + "loss_num": 0.05078125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 74304024, + "step": 1186 + }, + { + "epoch": 3.950083194675541, + "grad_norm": 8.358492851257324, + "learning_rate": 5e-06, + "loss": 0.7479, + "num_input_tokens_seen": 74367408, + "step": 1187 + }, + { + "epoch": 3.950083194675541, + "loss": 0.6704340577125549, + "loss_ce": 0.0016108033014461398, + "loss_iou": 0.220703125, + "loss_num": 0.045166015625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 74367408, + "step": 1187 + }, + { + "epoch": 3.9534109816971714, + "grad_norm": 10.79404067993164, + "learning_rate": 5e-06, + "loss": 0.7976, + "num_input_tokens_seen": 74430972, + "step": 1188 + }, + { + "epoch": 3.9534109816971714, + "loss": 0.8109077215194702, + "loss_ce": 0.0008491338812746108, + "loss_iou": 0.3046875, + "loss_num": 0.040283203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 74430972, + "step": 1188 + }, + { + "epoch": 3.956738768718802, + "grad_norm": 10.02695083618164, + "learning_rate": 5e-06, + "loss": 0.6818, + "num_input_tokens_seen": 74493320, + "step": 1189 + }, + { + "epoch": 3.956738768718802, + "loss": 0.6926209330558777, + "loss_ce": 0.0002991709334310144, + "loss_iou": 0.23828125, + "loss_num": 0.043212890625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 74493320, + "step": 1189 + }, + { + "epoch": 3.9600665557404326, + "grad_norm": 15.090518951416016, + "learning_rate": 5e-06, + "loss": 0.6759, + "num_input_tokens_seen": 74556176, + "step": 1190 + }, + { + "epoch": 3.9600665557404326, + "loss": 0.37127041816711426, + "loss_ce": 0.0005428859149105847, + "loss_iou": 0.10888671875, + "loss_num": 0.0303955078125, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 74556176, + "step": 1190 + }, + { + "epoch": 3.9633943427620633, + "grad_norm": 15.567995071411133, + "learning_rate": 5e-06, + "loss": 0.8636, + "num_input_tokens_seen": 74619904, + "step": 1191 + }, + { + "epoch": 3.9633943427620633, + "loss": 1.1187453269958496, + "loss_ce": 0.001313702785409987, + "loss_iou": 0.421875, + "loss_num": 0.05517578125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 74619904, + "step": 1191 + }, + { + "epoch": 3.966722129783694, + "grad_norm": 20.189136505126953, + "learning_rate": 5e-06, + "loss": 0.899, + "num_input_tokens_seen": 74683056, + "step": 1192 + }, + { + "epoch": 3.966722129783694, + "loss": 0.6560426950454712, + "loss_ce": 0.002234060550108552, + "loss_iou": 0.1826171875, + "loss_num": 0.057373046875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 74683056, + "step": 1192 + }, + { + "epoch": 3.9700499168053245, + "grad_norm": 15.30156135559082, + "learning_rate": 5e-06, + "loss": 0.5481, + "num_input_tokens_seen": 74744020, + "step": 1193 + }, + { + "epoch": 3.9700499168053245, + "loss": 0.45218271017074585, + "loss_ce": 3.4305761801078916e-05, + "loss_iou": 0.134765625, + "loss_num": 0.03662109375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 74744020, + "step": 1193 + }, + { + "epoch": 3.973377703826955, + "grad_norm": 18.261381149291992, + "learning_rate": 5e-06, + "loss": 0.6689, + "num_input_tokens_seen": 74806148, + "step": 1194 + }, + { + "epoch": 3.973377703826955, + "loss": 0.6880115866661072, + "loss_ce": 2.3293252525036223e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.05517578125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 74806148, + "step": 1194 + }, + { + "epoch": 3.9767054908485857, + "grad_norm": 35.95396423339844, + "learning_rate": 5e-06, + "loss": 0.9737, + "num_input_tokens_seen": 74870276, + "step": 1195 + }, + { + "epoch": 3.9767054908485857, + "loss": 1.2164342403411865, + "loss_ce": 0.003543566446751356, + "loss_iou": 0.408203125, + "loss_num": 0.07958984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 74870276, + "step": 1195 + }, + { + "epoch": 3.9800332778702163, + "grad_norm": 15.76313304901123, + "learning_rate": 5e-06, + "loss": 0.7284, + "num_input_tokens_seen": 74933660, + "step": 1196 + }, + { + "epoch": 3.9800332778702163, + "loss": 0.8802682161331177, + "loss_ce": 0.0006294772610999644, + "loss_iou": 0.2578125, + "loss_num": 0.07275390625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 74933660, + "step": 1196 + }, + { + "epoch": 3.983361064891847, + "grad_norm": 8.434407234191895, + "learning_rate": 5e-06, + "loss": 0.6092, + "num_input_tokens_seen": 74997052, + "step": 1197 + }, + { + "epoch": 3.983361064891847, + "loss": 0.511523962020874, + "loss_ce": 0.0004155955102760345, + "loss_iou": 0.1650390625, + "loss_num": 0.0361328125, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 74997052, + "step": 1197 + }, + { + "epoch": 3.9866888519134775, + "grad_norm": 10.437862396240234, + "learning_rate": 5e-06, + "loss": 0.7356, + "num_input_tokens_seen": 75058944, + "step": 1198 + }, + { + "epoch": 3.9866888519134775, + "loss": 0.9156403541564941, + "loss_ce": 0.0020660818554461002, + "loss_iou": 0.294921875, + "loss_num": 0.06396484375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 75058944, + "step": 1198 + }, + { + "epoch": 3.990016638935108, + "grad_norm": 22.736345291137695, + "learning_rate": 5e-06, + "loss": 0.601, + "num_input_tokens_seen": 75120440, + "step": 1199 + }, + { + "epoch": 3.990016638935108, + "loss": 0.6733030080795288, + "loss_ce": 0.0006955533754080534, + "loss_iou": 0.2314453125, + "loss_num": 0.0419921875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 75120440, + "step": 1199 + }, + { + "epoch": 3.9933444259567388, + "grad_norm": 19.93775177001953, + "learning_rate": 5e-06, + "loss": 0.5766, + "num_input_tokens_seen": 75180068, + "step": 1200 + }, + { + "epoch": 3.9933444259567388, + "loss": 0.502943754196167, + "loss_ce": 1.4059570276003797e-05, + "loss_iou": 0.1240234375, + "loss_num": 0.051025390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 75180068, + "step": 1200 + }, + { + "epoch": 3.9966722129783694, + "grad_norm": 7.942922592163086, + "learning_rate": 5e-06, + "loss": 0.6018, + "num_input_tokens_seen": 75242588, + "step": 1201 + }, + { + "epoch": 3.9966722129783694, + "loss": 0.5372599363327026, + "loss_ce": 0.0003947424702346325, + "loss_iou": 0.2041015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 75242588, + "step": 1201 + }, + { + "epoch": 4.0, + "grad_norm": 15.248062133789062, + "learning_rate": 5e-06, + "loss": 0.6613, + "num_input_tokens_seen": 75304732, + "step": 1202 + }, + { + "epoch": 4.0, + "loss": 0.7052999138832092, + "loss_ce": 0.000648995628580451, + "loss_iou": 0.177734375, + "loss_num": 0.06982421875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 75304732, + "step": 1202 + }, + { + "epoch": 4.003327787021631, + "grad_norm": 11.188032150268555, + "learning_rate": 5e-06, + "loss": 0.7275, + "num_input_tokens_seen": 75366540, + "step": 1203 + }, + { + "epoch": 4.003327787021631, + "loss": 0.7967070937156677, + "loss_ce": 0.0008086481248028576, + "loss_iou": 0.298828125, + "loss_num": 0.039794921875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 75366540, + "step": 1203 + }, + { + "epoch": 4.006655574043261, + "grad_norm": 5.265565872192383, + "learning_rate": 5e-06, + "loss": 0.3553, + "num_input_tokens_seen": 75427768, + "step": 1204 + }, + { + "epoch": 4.006655574043261, + "loss": 0.2486070692539215, + "loss_ce": 1.0882431524805725e-05, + "loss_iou": 0.0, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 75427768, + "step": 1204 + }, + { + "epoch": 4.009983361064892, + "grad_norm": 73.91722106933594, + "learning_rate": 5e-06, + "loss": 0.7568, + "num_input_tokens_seen": 75491840, + "step": 1205 + }, + { + "epoch": 4.009983361064892, + "loss": 0.8017033338546753, + "loss_ce": 0.000922084494959563, + "loss_iou": 0.279296875, + "loss_num": 0.04833984375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 75491840, + "step": 1205 + }, + { + "epoch": 4.0133111480865225, + "grad_norm": 19.91536521911621, + "learning_rate": 5e-06, + "loss": 0.5606, + "num_input_tokens_seen": 75553160, + "step": 1206 + }, + { + "epoch": 4.0133111480865225, + "loss": 0.5015987157821655, + "loss_ce": 0.00013387855142354965, + "loss_iou": 0.1376953125, + "loss_num": 0.045166015625, + "loss_xval": 0.5, + "num_input_tokens_seen": 75553160, + "step": 1206 + }, + { + "epoch": 4.016638935108153, + "grad_norm": 12.133269309997559, + "learning_rate": 5e-06, + "loss": 0.8538, + "num_input_tokens_seen": 75618412, + "step": 1207 + }, + { + "epoch": 4.016638935108153, + "loss": 0.9982300996780396, + "loss_ce": 0.0014039267553016543, + "loss_iou": 0.365234375, + "loss_num": 0.0537109375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 75618412, + "step": 1207 + }, + { + "epoch": 4.019966722129784, + "grad_norm": 8.875154495239258, + "learning_rate": 5e-06, + "loss": 0.775, + "num_input_tokens_seen": 75682372, + "step": 1208 + }, + { + "epoch": 4.019966722129784, + "loss": 0.8194925785064697, + "loss_ce": 0.0001566612918395549, + "loss_iou": 0.2890625, + "loss_num": 0.04833984375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 75682372, + "step": 1208 + }, + { + "epoch": 4.023294509151414, + "grad_norm": 10.672651290893555, + "learning_rate": 5e-06, + "loss": 0.4433, + "num_input_tokens_seen": 75742552, + "step": 1209 + }, + { + "epoch": 4.023294509151414, + "loss": 0.3726426362991333, + "loss_ce": 2.2998472559265792e-05, + "loss_iou": 0.05029296875, + "loss_num": 0.054443359375, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 75742552, + "step": 1209 + }, + { + "epoch": 4.026622296173045, + "grad_norm": 31.792285919189453, + "learning_rate": 5e-06, + "loss": 0.7017, + "num_input_tokens_seen": 75804336, + "step": 1210 + }, + { + "epoch": 4.026622296173045, + "loss": 0.7170919179916382, + "loss_ce": 0.0002950564958155155, + "loss_iou": 0.2451171875, + "loss_num": 0.045166015625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 75804336, + "step": 1210 + }, + { + "epoch": 4.0299500831946755, + "grad_norm": 14.573902130126953, + "learning_rate": 5e-06, + "loss": 0.6705, + "num_input_tokens_seen": 75865872, + "step": 1211 + }, + { + "epoch": 4.0299500831946755, + "loss": 0.7652605175971985, + "loss_ce": 0.00030688103288412094, + "loss_iou": 0.2314453125, + "loss_num": 0.060302734375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 75865872, + "step": 1211 + }, + { + "epoch": 4.033277870216306, + "grad_norm": 17.535743713378906, + "learning_rate": 5e-06, + "loss": 0.6471, + "num_input_tokens_seen": 75930268, + "step": 1212 + }, + { + "epoch": 4.033277870216306, + "loss": 0.7741683721542358, + "loss_ce": 0.0007308665663003922, + "loss_iou": 0.2890625, + "loss_num": 0.039306640625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 75930268, + "step": 1212 + }, + { + "epoch": 4.036605657237937, + "grad_norm": 14.236082077026367, + "learning_rate": 5e-06, + "loss": 0.9975, + "num_input_tokens_seen": 75994488, + "step": 1213 + }, + { + "epoch": 4.036605657237937, + "loss": 1.1918745040893555, + "loss_ce": 0.0007124332478269935, + "loss_iou": 0.404296875, + "loss_num": 0.07666015625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 75994488, + "step": 1213 + }, + { + "epoch": 4.039933444259567, + "grad_norm": 12.264684677124023, + "learning_rate": 5e-06, + "loss": 0.8542, + "num_input_tokens_seen": 76057384, + "step": 1214 + }, + { + "epoch": 4.039933444259567, + "loss": 1.0039153099060059, + "loss_ce": 0.0009246146655641496, + "loss_iou": 0.345703125, + "loss_num": 0.062255859375, + "loss_xval": 1.0, + "num_input_tokens_seen": 76057384, + "step": 1214 + }, + { + "epoch": 4.043261231281198, + "grad_norm": 11.338297843933105, + "learning_rate": 5e-06, + "loss": 0.6555, + "num_input_tokens_seen": 76120200, + "step": 1215 + }, + { + "epoch": 4.043261231281198, + "loss": 0.6709122061729431, + "loss_ce": 1.3811413737130351e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.038330078125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 76120200, + "step": 1215 + }, + { + "epoch": 4.046589018302829, + "grad_norm": 11.017167091369629, + "learning_rate": 5e-06, + "loss": 0.759, + "num_input_tokens_seen": 76184652, + "step": 1216 + }, + { + "epoch": 4.046589018302829, + "loss": 0.8067546486854553, + "loss_ce": 0.0002360953949391842, + "loss_iou": 0.294921875, + "loss_num": 0.04296875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 76184652, + "step": 1216 + }, + { + "epoch": 4.049916805324459, + "grad_norm": 9.538055419921875, + "learning_rate": 5e-06, + "loss": 0.659, + "num_input_tokens_seen": 76248504, + "step": 1217 + }, + { + "epoch": 4.049916805324459, + "loss": 0.6807469129562378, + "loss_ce": 0.00020490906899794936, + "loss_iou": 0.265625, + "loss_num": 0.0299072265625, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 76248504, + "step": 1217 + }, + { + "epoch": 4.05324459234609, + "grad_norm": 13.316681861877441, + "learning_rate": 5e-06, + "loss": 0.7569, + "num_input_tokens_seen": 76312336, + "step": 1218 + }, + { + "epoch": 4.05324459234609, + "loss": 0.6233140230178833, + "loss_ce": 0.00014506097068078816, + "loss_iou": 0.2275390625, + "loss_num": 0.033447265625, + "loss_xval": 0.625, + "num_input_tokens_seen": 76312336, + "step": 1218 + }, + { + "epoch": 4.05657237936772, + "grad_norm": 18.77733039855957, + "learning_rate": 5e-06, + "loss": 0.7825, + "num_input_tokens_seen": 76377484, + "step": 1219 + }, + { + "epoch": 4.05657237936772, + "loss": 0.8869447708129883, + "loss_ce": 0.001202607061713934, + "loss_iou": 0.33984375, + "loss_num": 0.041015625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 76377484, + "step": 1219 + }, + { + "epoch": 4.059900166389351, + "grad_norm": 13.015058517456055, + "learning_rate": 5e-06, + "loss": 0.8573, + "num_input_tokens_seen": 76439780, + "step": 1220 + }, + { + "epoch": 4.059900166389351, + "loss": 0.7998440265655518, + "loss_ce": 0.0010159012163057923, + "loss_iou": 0.291015625, + "loss_num": 0.04345703125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 76439780, + "step": 1220 + }, + { + "epoch": 4.063227953410982, + "grad_norm": 10.296722412109375, + "learning_rate": 5e-06, + "loss": 1.0222, + "num_input_tokens_seen": 76500992, + "step": 1221 + }, + { + "epoch": 4.063227953410982, + "loss": 0.8199893236160278, + "loss_ce": 0.0005923279095441103, + "loss_iou": 0.25390625, + "loss_num": 0.062255859375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 76500992, + "step": 1221 + }, + { + "epoch": 4.066555740432612, + "grad_norm": 16.238292694091797, + "learning_rate": 5e-06, + "loss": 0.971, + "num_input_tokens_seen": 76564108, + "step": 1222 + }, + { + "epoch": 4.066555740432612, + "loss": 1.0141180753707886, + "loss_ce": 0.0008124337764456868, + "loss_iou": 0.3984375, + "loss_num": 0.04345703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 76564108, + "step": 1222 + }, + { + "epoch": 4.069883527454243, + "grad_norm": 15.772029876708984, + "learning_rate": 5e-06, + "loss": 0.804, + "num_input_tokens_seen": 76627116, + "step": 1223 + }, + { + "epoch": 4.069883527454243, + "loss": 0.8473187685012817, + "loss_ce": 2.8751601348631084e-05, + "loss_iou": 0.31640625, + "loss_num": 0.042724609375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 76627116, + "step": 1223 + }, + { + "epoch": 4.0732113144758735, + "grad_norm": 21.51706314086914, + "learning_rate": 5e-06, + "loss": 0.757, + "num_input_tokens_seen": 76688968, + "step": 1224 + }, + { + "epoch": 4.0732113144758735, + "loss": 0.6889785528182983, + "loss_ce": 1.3671827218786348e-05, + "loss_iou": 0.2578125, + "loss_num": 0.03466796875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 76688968, + "step": 1224 + }, + { + "epoch": 4.076539101497504, + "grad_norm": 13.671257019042969, + "learning_rate": 5e-06, + "loss": 0.6394, + "num_input_tokens_seen": 76752252, + "step": 1225 + }, + { + "epoch": 4.076539101497504, + "loss": 0.6889891624450684, + "loss_ce": 0.00026844226522371173, + "loss_iou": 0.275390625, + "loss_num": 0.02734375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 76752252, + "step": 1225 + }, + { + "epoch": 4.079866888519135, + "grad_norm": 12.221083641052246, + "learning_rate": 5e-06, + "loss": 0.5801, + "num_input_tokens_seen": 76814152, + "step": 1226 + }, + { + "epoch": 4.079866888519135, + "loss": 0.5570855736732483, + "loss_ce": 0.0003839263808913529, + "loss_iou": 0.19921875, + "loss_num": 0.03173828125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 76814152, + "step": 1226 + }, + { + "epoch": 4.083194675540765, + "grad_norm": 20.396621704101562, + "learning_rate": 5e-06, + "loss": 0.773, + "num_input_tokens_seen": 76877564, + "step": 1227 + }, + { + "epoch": 4.083194675540765, + "loss": 0.6958671808242798, + "loss_ce": 6.636592297581956e-05, + "loss_iou": 0.234375, + "loss_num": 0.044921875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 76877564, + "step": 1227 + }, + { + "epoch": 4.086522462562396, + "grad_norm": 21.930482864379883, + "learning_rate": 5e-06, + "loss": 0.9511, + "num_input_tokens_seen": 76943260, + "step": 1228 + }, + { + "epoch": 4.086522462562396, + "loss": 1.138702630996704, + "loss_ce": 0.0022280181292444468, + "loss_iou": 0.44140625, + "loss_num": 0.051025390625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 76943260, + "step": 1228 + }, + { + "epoch": 4.0898502495840265, + "grad_norm": 10.00102424621582, + "learning_rate": 5e-06, + "loss": 0.6424, + "num_input_tokens_seen": 77005432, + "step": 1229 + }, + { + "epoch": 4.0898502495840265, + "loss": 0.47109830379486084, + "loss_ce": 0.0005172473029233515, + "loss_iou": 0.126953125, + "loss_num": 0.04345703125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 77005432, + "step": 1229 + }, + { + "epoch": 4.093178036605657, + "grad_norm": 15.279685020446777, + "learning_rate": 5e-06, + "loss": 0.8382, + "num_input_tokens_seen": 77067552, + "step": 1230 + }, + { + "epoch": 4.093178036605657, + "loss": 0.7615088820457458, + "loss_ce": 3.428545824135654e-05, + "loss_iou": 0.251953125, + "loss_num": 0.051513671875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 77067552, + "step": 1230 + }, + { + "epoch": 4.096505823627288, + "grad_norm": 24.48037338256836, + "learning_rate": 5e-06, + "loss": 0.7931, + "num_input_tokens_seen": 77130584, + "step": 1231 + }, + { + "epoch": 4.096505823627288, + "loss": 0.9837879538536072, + "loss_ce": 2.3307909941649996e-05, + "loss_iou": 0.345703125, + "loss_num": 0.05810546875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 77130584, + "step": 1231 + }, + { + "epoch": 4.099833610648918, + "grad_norm": 35.03629684448242, + "learning_rate": 5e-06, + "loss": 1.0287, + "num_input_tokens_seen": 77194704, + "step": 1232 + }, + { + "epoch": 4.099833610648918, + "loss": 1.1050645112991333, + "loss_ce": 0.0008164606406353414, + "loss_iou": 0.365234375, + "loss_num": 0.07470703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 77194704, + "step": 1232 + }, + { + "epoch": 4.103161397670549, + "grad_norm": 47.826690673828125, + "learning_rate": 5e-06, + "loss": 0.7708, + "num_input_tokens_seen": 77258880, + "step": 1233 + }, + { + "epoch": 4.103161397670549, + "loss": 0.8139855861663818, + "loss_ce": 0.0007531545124948025, + "loss_iou": 0.326171875, + "loss_num": 0.031982421875, + "loss_xval": 0.8125, + "num_input_tokens_seen": 77258880, + "step": 1233 + }, + { + "epoch": 4.10648918469218, + "grad_norm": 29.535999298095703, + "learning_rate": 5e-06, + "loss": 0.7673, + "num_input_tokens_seen": 77321428, + "step": 1234 + }, + { + "epoch": 4.10648918469218, + "loss": 0.7830252647399902, + "loss_ce": 6.632165604969487e-05, + "loss_iou": 0.294921875, + "loss_num": 0.038818359375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 77321428, + "step": 1234 + }, + { + "epoch": 4.10981697171381, + "grad_norm": 21.408437728881836, + "learning_rate": 5e-06, + "loss": 0.7928, + "num_input_tokens_seen": 77383604, + "step": 1235 + }, + { + "epoch": 4.10981697171381, + "loss": 0.7510450482368469, + "loss_ce": 0.0008008688455447555, + "loss_iou": 0.259765625, + "loss_num": 0.0458984375, + "loss_xval": 0.75, + "num_input_tokens_seen": 77383604, + "step": 1235 + }, + { + "epoch": 4.113144758735441, + "grad_norm": 15.90986442565918, + "learning_rate": 5e-06, + "loss": 0.4682, + "num_input_tokens_seen": 77446016, + "step": 1236 + }, + { + "epoch": 4.113144758735441, + "loss": 0.3090360760688782, + "loss_ce": 0.0005643867189064622, + "loss_iou": 0.0654296875, + "loss_num": 0.035400390625, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 77446016, + "step": 1236 + }, + { + "epoch": 4.116472545757071, + "grad_norm": 22.74576187133789, + "learning_rate": 5e-06, + "loss": 0.9686, + "num_input_tokens_seen": 77511136, + "step": 1237 + }, + { + "epoch": 4.116472545757071, + "loss": 1.0496145486831665, + "loss_ce": 0.00029816440655849874, + "loss_iou": 0.3359375, + "loss_num": 0.0751953125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 77511136, + "step": 1237 + }, + { + "epoch": 4.119800332778702, + "grad_norm": 27.88201332092285, + "learning_rate": 5e-06, + "loss": 0.5876, + "num_input_tokens_seen": 77574216, + "step": 1238 + }, + { + "epoch": 4.119800332778702, + "loss": 0.6051973700523376, + "loss_ce": 0.00046104774810373783, + "loss_iou": 0.21875, + "loss_num": 0.033447265625, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 77574216, + "step": 1238 + }, + { + "epoch": 4.123128119800333, + "grad_norm": 11.923408508300781, + "learning_rate": 5e-06, + "loss": 0.6994, + "num_input_tokens_seen": 77636964, + "step": 1239 + }, + { + "epoch": 4.123128119800333, + "loss": 0.9356486201286316, + "loss_ce": 0.0003458983264863491, + "loss_iou": 0.2890625, + "loss_num": 0.0712890625, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 77636964, + "step": 1239 + }, + { + "epoch": 4.126455906821963, + "grad_norm": 14.89736270904541, + "learning_rate": 5e-06, + "loss": 0.4658, + "num_input_tokens_seen": 77697900, + "step": 1240 + }, + { + "epoch": 4.126455906821963, + "loss": 0.5524719953536987, + "loss_ce": 0.009991498664021492, + "loss_iou": 0.19140625, + "loss_num": 0.031982421875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 77697900, + "step": 1240 + }, + { + "epoch": 4.129783693843594, + "grad_norm": 7.385460376739502, + "learning_rate": 5e-06, + "loss": 0.7427, + "num_input_tokens_seen": 77760148, + "step": 1241 + }, + { + "epoch": 4.129783693843594, + "loss": 0.9304953813552856, + "loss_ce": 0.0008078092359937727, + "loss_iou": 0.3046875, + "loss_num": 0.0634765625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 77760148, + "step": 1241 + }, + { + "epoch": 4.1331114808652245, + "grad_norm": 11.04951286315918, + "learning_rate": 5e-06, + "loss": 0.6882, + "num_input_tokens_seen": 77821680, + "step": 1242 + }, + { + "epoch": 4.1331114808652245, + "loss": 0.5894361734390259, + "loss_ce": 1.963950853678398e-05, + "loss_iou": 0.19140625, + "loss_num": 0.04150390625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 77821680, + "step": 1242 + }, + { + "epoch": 4.136439267886855, + "grad_norm": 41.49568176269531, + "learning_rate": 5e-06, + "loss": 0.9372, + "num_input_tokens_seen": 77884232, + "step": 1243 + }, + { + "epoch": 4.136439267886855, + "loss": 0.7601950168609619, + "loss_ce": 0.0006735285860486329, + "loss_iou": 0.28125, + "loss_num": 0.0390625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 77884232, + "step": 1243 + }, + { + "epoch": 4.139767054908486, + "grad_norm": 7.810762405395508, + "learning_rate": 5e-06, + "loss": 0.6704, + "num_input_tokens_seen": 77947048, + "step": 1244 + }, + { + "epoch": 4.139767054908486, + "loss": 0.817348062992096, + "loss_ce": 0.00057560222921893, + "loss_iou": 0.265625, + "loss_num": 0.056640625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 77947048, + "step": 1244 + }, + { + "epoch": 4.143094841930116, + "grad_norm": 14.463872909545898, + "learning_rate": 5e-06, + "loss": 0.5967, + "num_input_tokens_seen": 78007588, + "step": 1245 + }, + { + "epoch": 4.143094841930116, + "loss": 0.4404696822166443, + "loss_ce": 3.99709097109735e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.03466796875, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 78007588, + "step": 1245 + }, + { + "epoch": 4.146422628951747, + "grad_norm": 11.963314056396484, + "learning_rate": 5e-06, + "loss": 0.7085, + "num_input_tokens_seen": 78069416, + "step": 1246 + }, + { + "epoch": 4.146422628951747, + "loss": 0.6378343105316162, + "loss_ce": 1.6925867384998128e-05, + "loss_iou": 0.2109375, + "loss_num": 0.043212890625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 78069416, + "step": 1246 + }, + { + "epoch": 4.149750415973378, + "grad_norm": 23.427759170532227, + "learning_rate": 5e-06, + "loss": 0.6977, + "num_input_tokens_seen": 78132404, + "step": 1247 + }, + { + "epoch": 4.149750415973378, + "loss": 0.7420227527618408, + "loss_ce": 0.00038455973844975233, + "loss_iou": 0.2314453125, + "loss_num": 0.0556640625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 78132404, + "step": 1247 + }, + { + "epoch": 4.153078202995008, + "grad_norm": 19.418766021728516, + "learning_rate": 5e-06, + "loss": 0.5784, + "num_input_tokens_seen": 78194872, + "step": 1248 + }, + { + "epoch": 4.153078202995008, + "loss": 0.4353361427783966, + "loss_ce": 0.00021654315060004592, + "loss_iou": 0.11181640625, + "loss_num": 0.042236328125, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 78194872, + "step": 1248 + }, + { + "epoch": 4.156405990016639, + "grad_norm": 12.20608139038086, + "learning_rate": 5e-06, + "loss": 0.8433, + "num_input_tokens_seen": 78258412, + "step": 1249 + }, + { + "epoch": 4.156405990016639, + "loss": 0.7217040061950684, + "loss_ce": 2.4295461116707884e-05, + "loss_iou": 0.263671875, + "loss_num": 0.038818359375, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 78258412, + "step": 1249 + }, + { + "epoch": 4.159733777038269, + "grad_norm": 12.527824401855469, + "learning_rate": 5e-06, + "loss": 0.7033, + "num_input_tokens_seen": 78322204, + "step": 1250 + }, + { + "epoch": 4.159733777038269, + "eval_seeclick_CIoU": 0.12239253148436546, + "eval_seeclick_GIoU": 0.1511169895529747, + "eval_seeclick_IoU": 0.21942590922117233, + "eval_seeclick_MAE_all": 0.18183603137731552, + "eval_seeclick_MAE_h": 0.05301396735012531, + "eval_seeclick_MAE_w": 0.11510565504431725, + "eval_seeclick_MAE_x_boxes": 0.26491403579711914, + "eval_seeclick_MAE_y_boxes": 0.14476536214351654, + "eval_seeclick_NUM_probability": 0.9971157312393188, + "eval_seeclick_inside_bbox": 0.2718750014901161, + "eval_seeclick_loss": 2.71364426612854, + "eval_seeclick_loss_ce": 0.0916629321873188, + "eval_seeclick_loss_iou": 0.8514404296875, + "eval_seeclick_loss_num": 0.18135833740234375, + "eval_seeclick_loss_xval": 2.6103515625, + "eval_seeclick_runtime": 65.2703, + "eval_seeclick_samples_per_second": 0.72, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 78322204, + "step": 1250 + }, + { + "epoch": 4.159733777038269, + "eval_icons_CIoU": 0.03201650455594063, + "eval_icons_GIoU": 0.14019615948200226, + "eval_icons_IoU": 0.1814664751291275, + "eval_icons_MAE_all": 0.1366742141544819, + "eval_icons_MAE_h": 0.053603796288371086, + "eval_icons_MAE_w": 0.13712040334939957, + "eval_icons_MAE_x_boxes": 0.16213426738977432, + "eval_icons_MAE_y_boxes": 0.042389593087136745, + "eval_icons_NUM_probability": 0.9999926686286926, + "eval_icons_inside_bbox": 0.3576388955116272, + "eval_icons_loss": 2.41593861579895, + "eval_icons_loss_ce": 1.236241587321274e-06, + "eval_icons_loss_iou": 0.845458984375, + "eval_icons_loss_num": 0.13992691040039062, + "eval_icons_loss_xval": 2.38916015625, + "eval_icons_runtime": 64.8775, + "eval_icons_samples_per_second": 0.771, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 78322204, + "step": 1250 + }, + { + "epoch": 4.159733777038269, + "eval_screenspot_CIoU": 0.05670065308610598, + "eval_screenspot_GIoU": 0.12369907399018605, + "eval_screenspot_IoU": 0.20270906388759613, + "eval_screenspot_MAE_all": 0.20509299635887146, + "eval_screenspot_MAE_h": 0.0750602458914121, + "eval_screenspot_MAE_w": 0.1766246110200882, + "eval_screenspot_MAE_x_boxes": 0.26625461379686993, + "eval_screenspot_MAE_y_boxes": 0.14071540037790933, + "eval_screenspot_NUM_probability": 0.9999165137608846, + "eval_screenspot_inside_bbox": 0.3387500047683716, + "eval_screenspot_loss": 2.7868411540985107, + "eval_screenspot_loss_ce": 0.0001397205999940828, + "eval_screenspot_loss_iou": 0.8883463541666666, + "eval_screenspot_loss_num": 0.21124267578125, + "eval_screenspot_loss_xval": 2.8326822916666665, + "eval_screenspot_runtime": 112.2559, + "eval_screenspot_samples_per_second": 0.793, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 78322204, + "step": 1250 + }, + { + "epoch": 4.159733777038269, + "eval_compot_CIoU": -0.04526886157691479, + "eval_compot_GIoU": 0.07369405962526798, + "eval_compot_IoU": 0.12557204440236092, + "eval_compot_MAE_all": 0.21759440749883652, + "eval_compot_MAE_h": 0.0656399242579937, + "eval_compot_MAE_w": 0.24755792319774628, + "eval_compot_MAE_x_boxes": 0.19112109392881393, + "eval_compot_MAE_y_boxes": 0.13230591267347336, + "eval_compot_NUM_probability": 0.9999650120735168, + "eval_compot_inside_bbox": 0.2395833358168602, + "eval_compot_loss": 2.8879714012145996, + "eval_compot_loss_ce": 0.005962656578049064, + "eval_compot_loss_iou": 0.923095703125, + "eval_compot_loss_num": 0.205718994140625, + "eval_compot_loss_xval": 2.873046875, + "eval_compot_runtime": 74.5769, + "eval_compot_samples_per_second": 0.67, + "eval_compot_steps_per_second": 0.027, + "num_input_tokens_seen": 78322204, + "step": 1250 + }, + { + "epoch": 4.159733777038269, + "eval_custom_ui_MAE_all": 0.07984927669167519, + "eval_custom_ui_MAE_x": 0.07491207309067249, + "eval_custom_ui_MAE_y": 0.08478647843003273, + "eval_custom_ui_NUM_probability": 0.9999922215938568, + "eval_custom_ui_loss": 0.38728201389312744, + "eval_custom_ui_loss_ce": 1.5295036746465485e-05, + "eval_custom_ui_loss_num": 0.0752716064453125, + "eval_custom_ui_loss_xval": 0.37652587890625, + "eval_custom_ui_runtime": 58.5282, + "eval_custom_ui_samples_per_second": 0.854, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 78322204, + "step": 1250 + }, + { + "epoch": 4.159733777038269, + "loss": 0.3760015368461609, + "loss_ce": 2.4953300453489646e-05, + "loss_iou": 0.0, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 78322204, + "step": 1250 + }, + { + "epoch": 4.1630615640599, + "grad_norm": 8.160618782043457, + "learning_rate": 5e-06, + "loss": 0.5385, + "num_input_tokens_seen": 78384488, + "step": 1251 + }, + { + "epoch": 4.1630615640599, + "loss": 0.5602893829345703, + "loss_ce": 0.0011768110562115908, + "loss_iou": 0.166015625, + "loss_num": 0.045166015625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 78384488, + "step": 1251 + }, + { + "epoch": 4.166389351081531, + "grad_norm": 36.30681610107422, + "learning_rate": 5e-06, + "loss": 0.9031, + "num_input_tokens_seen": 78447932, + "step": 1252 + }, + { + "epoch": 4.166389351081531, + "loss": 0.8445026874542236, + "loss_ce": 0.00026445844559930265, + "loss_iou": 0.298828125, + "loss_num": 0.049560546875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 78447932, + "step": 1252 + }, + { + "epoch": 4.169717138103161, + "grad_norm": 49.61948776245117, + "learning_rate": 5e-06, + "loss": 0.9137, + "num_input_tokens_seen": 78513152, + "step": 1253 + }, + { + "epoch": 4.169717138103161, + "loss": 0.8838903903961182, + "loss_ce": 0.0010779636213555932, + "loss_iou": 0.310546875, + "loss_num": 0.052734375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 78513152, + "step": 1253 + }, + { + "epoch": 4.173044925124792, + "grad_norm": 29.25813102722168, + "learning_rate": 5e-06, + "loss": 0.4711, + "num_input_tokens_seen": 78574284, + "step": 1254 + }, + { + "epoch": 4.173044925124792, + "loss": 0.4923175275325775, + "loss_ce": 0.0004657130048144609, + "loss_iou": 0.1318359375, + "loss_num": 0.045654296875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 78574284, + "step": 1254 + }, + { + "epoch": 4.1763727121464225, + "grad_norm": 20.463504791259766, + "learning_rate": 5e-06, + "loss": 0.6105, + "num_input_tokens_seen": 78635596, + "step": 1255 + }, + { + "epoch": 4.1763727121464225, + "loss": 0.667670488357544, + "loss_ce": 0.0014106841990724206, + "loss_iou": 0.23828125, + "loss_num": 0.0380859375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 78635596, + "step": 1255 + }, + { + "epoch": 4.179700499168053, + "grad_norm": 18.627574920654297, + "learning_rate": 5e-06, + "loss": 0.6598, + "num_input_tokens_seen": 78697748, + "step": 1256 + }, + { + "epoch": 4.179700499168053, + "loss": 0.6276933550834656, + "loss_ce": 0.00021382489649113268, + "loss_iou": 0.23046875, + "loss_num": 0.033203125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 78697748, + "step": 1256 + }, + { + "epoch": 4.183028286189684, + "grad_norm": 8.834410667419434, + "learning_rate": 5e-06, + "loss": 0.5655, + "num_input_tokens_seen": 78759760, + "step": 1257 + }, + { + "epoch": 4.183028286189684, + "loss": 0.6625989079475403, + "loss_ce": 0.0004895288147963583, + "loss_iou": 0.2275390625, + "loss_num": 0.04150390625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 78759760, + "step": 1257 + }, + { + "epoch": 4.186356073211314, + "grad_norm": 14.236786842346191, + "learning_rate": 5e-06, + "loss": 0.6359, + "num_input_tokens_seen": 78823028, + "step": 1258 + }, + { + "epoch": 4.186356073211314, + "loss": 0.6722902059555054, + "loss_ce": 4.9012665840564296e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0267333984375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 78823028, + "step": 1258 + }, + { + "epoch": 4.189683860232945, + "grad_norm": 83.77409362792969, + "learning_rate": 5e-06, + "loss": 0.5601, + "num_input_tokens_seen": 78885504, + "step": 1259 + }, + { + "epoch": 4.189683860232945, + "loss": 0.4171263575553894, + "loss_ce": 0.0001341925235465169, + "loss_iou": 0.146484375, + "loss_num": 0.02490234375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 78885504, + "step": 1259 + }, + { + "epoch": 4.1930116472545755, + "grad_norm": 12.761661529541016, + "learning_rate": 5e-06, + "loss": 0.8203, + "num_input_tokens_seen": 78947504, + "step": 1260 + }, + { + "epoch": 4.1930116472545755, + "loss": 0.7472410202026367, + "loss_ce": 4.86068420286756e-05, + "loss_iou": 0.216796875, + "loss_num": 0.062255859375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 78947504, + "step": 1260 + }, + { + "epoch": 4.196339434276206, + "grad_norm": 12.434175491333008, + "learning_rate": 5e-06, + "loss": 0.6015, + "num_input_tokens_seen": 79006968, + "step": 1261 + }, + { + "epoch": 4.196339434276206, + "loss": 0.5894126296043396, + "loss_ce": 0.0007895919261500239, + "loss_iou": 0.173828125, + "loss_num": 0.048095703125, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 79006968, + "step": 1261 + }, + { + "epoch": 4.199667221297837, + "grad_norm": 10.954483985900879, + "learning_rate": 5e-06, + "loss": 0.9019, + "num_input_tokens_seen": 79068724, + "step": 1262 + }, + { + "epoch": 4.199667221297837, + "loss": 0.8818979859352112, + "loss_ce": 0.0005503545398823917, + "loss_iou": 0.31640625, + "loss_num": 0.0498046875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 79068724, + "step": 1262 + }, + { + "epoch": 4.202995008319467, + "grad_norm": 10.366689682006836, + "learning_rate": 5e-06, + "loss": 0.5979, + "num_input_tokens_seen": 79127728, + "step": 1263 + }, + { + "epoch": 4.202995008319467, + "loss": 0.7296203970909119, + "loss_ce": 6.130624115030514e-06, + "loss_iou": 0.224609375, + "loss_num": 0.055908203125, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 79127728, + "step": 1263 + }, + { + "epoch": 4.206322795341098, + "grad_norm": 9.267184257507324, + "learning_rate": 5e-06, + "loss": 0.7216, + "num_input_tokens_seen": 79190896, + "step": 1264 + }, + { + "epoch": 4.206322795341098, + "loss": 0.5524642467498779, + "loss_ce": 0.00021812476916238666, + "loss_iou": 0.1728515625, + "loss_num": 0.041015625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 79190896, + "step": 1264 + }, + { + "epoch": 4.209650582362729, + "grad_norm": 9.680049896240234, + "learning_rate": 5e-06, + "loss": 0.7248, + "num_input_tokens_seen": 79253700, + "step": 1265 + }, + { + "epoch": 4.209650582362729, + "loss": 0.6744685173034668, + "loss_ce": 0.00015209820412565023, + "loss_iou": 0.263671875, + "loss_num": 0.0291748046875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 79253700, + "step": 1265 + }, + { + "epoch": 4.212978369384359, + "grad_norm": 13.558832168579102, + "learning_rate": 5e-06, + "loss": 0.8204, + "num_input_tokens_seen": 79316324, + "step": 1266 + }, + { + "epoch": 4.212978369384359, + "loss": 0.8318663835525513, + "loss_ce": 0.00020139635307714343, + "loss_iou": 0.294921875, + "loss_num": 0.048583984375, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 79316324, + "step": 1266 + }, + { + "epoch": 4.21630615640599, + "grad_norm": 34.612335205078125, + "learning_rate": 5e-06, + "loss": 0.6904, + "num_input_tokens_seen": 79379112, + "step": 1267 + }, + { + "epoch": 4.21630615640599, + "loss": 0.7487878799438477, + "loss_ce": 8.608042662672233e-06, + "loss_iou": 0.2451171875, + "loss_num": 0.0517578125, + "loss_xval": 0.75, + "num_input_tokens_seen": 79379112, + "step": 1267 + }, + { + "epoch": 4.21963394342762, + "grad_norm": 13.187520027160645, + "learning_rate": 5e-06, + "loss": 0.7209, + "num_input_tokens_seen": 79441800, + "step": 1268 + }, + { + "epoch": 4.21963394342762, + "loss": 0.6884113550186157, + "loss_ce": 0.0010334014659747481, + "loss_iou": 0.23828125, + "loss_num": 0.042236328125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 79441800, + "step": 1268 + }, + { + "epoch": 4.222961730449251, + "grad_norm": 29.02869415283203, + "learning_rate": 5e-06, + "loss": 0.7215, + "num_input_tokens_seen": 79503748, + "step": 1269 + }, + { + "epoch": 4.222961730449251, + "loss": 0.7619832754135132, + "loss_ce": 2.0363593648653477e-05, + "loss_iou": 0.287109375, + "loss_num": 0.03759765625, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 79503748, + "step": 1269 + }, + { + "epoch": 4.226289517470882, + "grad_norm": 56.70915603637695, + "learning_rate": 5e-06, + "loss": 0.6429, + "num_input_tokens_seen": 79564680, + "step": 1270 + }, + { + "epoch": 4.226289517470882, + "loss": 0.7590841054916382, + "loss_ce": 5.0929971621371806e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.07275390625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 79564680, + "step": 1270 + }, + { + "epoch": 4.229617304492512, + "grad_norm": 16.210630416870117, + "learning_rate": 5e-06, + "loss": 0.7066, + "num_input_tokens_seen": 79626448, + "step": 1271 + }, + { + "epoch": 4.229617304492512, + "loss": 0.6228231191635132, + "loss_ce": 0.001607277779839933, + "loss_iou": 0.20703125, + "loss_num": 0.04150390625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 79626448, + "step": 1271 + }, + { + "epoch": 4.232945091514143, + "grad_norm": 26.057252883911133, + "learning_rate": 5e-06, + "loss": 0.6158, + "num_input_tokens_seen": 79689628, + "step": 1272 + }, + { + "epoch": 4.232945091514143, + "loss": 0.6330506801605225, + "loss_ce": 0.00023816811153665185, + "loss_iou": 0.248046875, + "loss_num": 0.0272216796875, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 79689628, + "step": 1272 + }, + { + "epoch": 4.2362728785357735, + "grad_norm": 8.330951690673828, + "learning_rate": 5e-06, + "loss": 0.75, + "num_input_tokens_seen": 79752856, + "step": 1273 + }, + { + "epoch": 4.2362728785357735, + "loss": 0.9343417286872864, + "loss_ce": 1.557775613036938e-05, + "loss_iou": 0.359375, + "loss_num": 0.04296875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 79752856, + "step": 1273 + }, + { + "epoch": 4.239600665557404, + "grad_norm": 28.429784774780273, + "learning_rate": 5e-06, + "loss": 0.7537, + "num_input_tokens_seen": 79813784, + "step": 1274 + }, + { + "epoch": 4.239600665557404, + "loss": 0.5956960916519165, + "loss_ce": 0.0004812246188521385, + "loss_iou": 0.177734375, + "loss_num": 0.0478515625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 79813784, + "step": 1274 + }, + { + "epoch": 4.242928452579035, + "grad_norm": 14.092952728271484, + "learning_rate": 5e-06, + "loss": 0.8371, + "num_input_tokens_seen": 79878556, + "step": 1275 + }, + { + "epoch": 4.242928452579035, + "loss": 0.9099881052970886, + "loss_ce": 7.601917604915798e-05, + "loss_iou": 0.353515625, + "loss_num": 0.041015625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 79878556, + "step": 1275 + }, + { + "epoch": 4.246256239600665, + "grad_norm": 25.563735961914062, + "learning_rate": 5e-06, + "loss": 0.7424, + "num_input_tokens_seen": 79940316, + "step": 1276 + }, + { + "epoch": 4.246256239600665, + "loss": 0.7878633737564087, + "loss_ce": 2.1567015210166574e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.059814453125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 79940316, + "step": 1276 + }, + { + "epoch": 4.249584026622296, + "grad_norm": 16.13930892944336, + "learning_rate": 5e-06, + "loss": 0.6181, + "num_input_tokens_seen": 80002848, + "step": 1277 + }, + { + "epoch": 4.249584026622296, + "loss": 0.6337130069732666, + "loss_ce": 0.00016809771477710456, + "loss_iou": 0.20703125, + "loss_num": 0.0439453125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 80002848, + "step": 1277 + }, + { + "epoch": 4.252911813643927, + "grad_norm": 13.609673500061035, + "learning_rate": 5e-06, + "loss": 0.8332, + "num_input_tokens_seen": 80065168, + "step": 1278 + }, + { + "epoch": 4.252911813643927, + "loss": 0.688213586807251, + "loss_ce": 0.00022533058654516935, + "loss_iou": 0.22265625, + "loss_num": 0.04833984375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 80065168, + "step": 1278 + }, + { + "epoch": 4.256239600665557, + "grad_norm": 13.89177417755127, + "learning_rate": 5e-06, + "loss": 0.7432, + "num_input_tokens_seen": 80126552, + "step": 1279 + }, + { + "epoch": 4.256239600665557, + "loss": 0.7881263494491577, + "loss_ce": 4.041851207148284e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0546875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 80126552, + "step": 1279 + }, + { + "epoch": 4.259567387687188, + "grad_norm": 9.584406852722168, + "learning_rate": 5e-06, + "loss": 0.7185, + "num_input_tokens_seen": 80189328, + "step": 1280 + }, + { + "epoch": 4.259567387687188, + "loss": 0.6353198289871216, + "loss_ce": 0.0009204413508996367, + "loss_iou": 0.2333984375, + "loss_num": 0.033447265625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 80189328, + "step": 1280 + }, + { + "epoch": 4.262895174708818, + "grad_norm": 8.969870567321777, + "learning_rate": 5e-06, + "loss": 0.783, + "num_input_tokens_seen": 80252808, + "step": 1281 + }, + { + "epoch": 4.262895174708818, + "loss": 0.9145519137382507, + "loss_ce": 0.0019542670343071222, + "loss_iou": 0.32421875, + "loss_num": 0.052734375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 80252808, + "step": 1281 + }, + { + "epoch": 4.266222961730449, + "grad_norm": 19.581363677978516, + "learning_rate": 5e-06, + "loss": 0.5597, + "num_input_tokens_seen": 80315084, + "step": 1282 + }, + { + "epoch": 4.266222961730449, + "loss": 0.49742501974105835, + "loss_ce": 1.9012808479601517e-05, + "loss_iou": 0.0751953125, + "loss_num": 0.0693359375, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 80315084, + "step": 1282 + }, + { + "epoch": 4.26955074875208, + "grad_norm": 18.102149963378906, + "learning_rate": 5e-06, + "loss": 0.6311, + "num_input_tokens_seen": 80378400, + "step": 1283 + }, + { + "epoch": 4.26955074875208, + "loss": 0.47788214683532715, + "loss_ce": 9.894504910334945e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0289306640625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 80378400, + "step": 1283 + }, + { + "epoch": 4.27287853577371, + "grad_norm": 14.705218315124512, + "learning_rate": 5e-06, + "loss": 0.7301, + "num_input_tokens_seen": 80440412, + "step": 1284 + }, + { + "epoch": 4.27287853577371, + "loss": 0.6811676025390625, + "loss_ce": 1.5225450624711812e-05, + "loss_iou": 0.248046875, + "loss_num": 0.036865234375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 80440412, + "step": 1284 + }, + { + "epoch": 4.276206322795341, + "grad_norm": 11.81163501739502, + "learning_rate": 5e-06, + "loss": 0.7499, + "num_input_tokens_seen": 80503416, + "step": 1285 + }, + { + "epoch": 4.276206322795341, + "loss": 0.718535304069519, + "loss_ce": 2.9458708013407886e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0294189453125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 80503416, + "step": 1285 + }, + { + "epoch": 4.2795341098169715, + "grad_norm": 11.332803726196289, + "learning_rate": 5e-06, + "loss": 0.6372, + "num_input_tokens_seen": 80566256, + "step": 1286 + }, + { + "epoch": 4.2795341098169715, + "loss": 0.6951990127563477, + "loss_ce": 0.0007410263060592115, + "loss_iou": 0.248046875, + "loss_num": 0.039794921875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 80566256, + "step": 1286 + }, + { + "epoch": 4.282861896838602, + "grad_norm": 12.372754096984863, + "learning_rate": 5e-06, + "loss": 0.6649, + "num_input_tokens_seen": 80628540, + "step": 1287 + }, + { + "epoch": 4.282861896838602, + "loss": 0.8295961618423462, + "loss_ce": 0.0007388241938315332, + "loss_iou": 0.318359375, + "loss_num": 0.0380859375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 80628540, + "step": 1287 + }, + { + "epoch": 4.286189683860233, + "grad_norm": 7.756584167480469, + "learning_rate": 5e-06, + "loss": 0.6643, + "num_input_tokens_seen": 80690812, + "step": 1288 + }, + { + "epoch": 4.286189683860233, + "loss": 0.6631507277488708, + "loss_ce": 0.001529662637040019, + "loss_iou": 0.2333984375, + "loss_num": 0.038818359375, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 80690812, + "step": 1288 + }, + { + "epoch": 4.289517470881863, + "grad_norm": 12.011344909667969, + "learning_rate": 5e-06, + "loss": 0.8091, + "num_input_tokens_seen": 80754852, + "step": 1289 + }, + { + "epoch": 4.289517470881863, + "loss": 0.5517739057540894, + "loss_ce": 0.00026023387908935547, + "loss_iou": 0.201171875, + "loss_num": 0.030029296875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 80754852, + "step": 1289 + }, + { + "epoch": 4.292845257903494, + "grad_norm": 14.856667518615723, + "learning_rate": 5e-06, + "loss": 0.7544, + "num_input_tokens_seen": 80816168, + "step": 1290 + }, + { + "epoch": 4.292845257903494, + "loss": 0.5227793455123901, + "loss_ce": 0.000226860967813991, + "loss_iou": 0.16015625, + "loss_num": 0.040283203125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 80816168, + "step": 1290 + }, + { + "epoch": 4.2961730449251245, + "grad_norm": 10.020024299621582, + "learning_rate": 5e-06, + "loss": 0.7188, + "num_input_tokens_seen": 80880640, + "step": 1291 + }, + { + "epoch": 4.2961730449251245, + "loss": 0.760665237903595, + "loss_ce": 0.0022423923946917057, + "loss_iou": 0.26953125, + "loss_num": 0.04345703125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 80880640, + "step": 1291 + }, + { + "epoch": 4.299500831946755, + "grad_norm": 11.497454643249512, + "learning_rate": 5e-06, + "loss": 0.5628, + "num_input_tokens_seen": 80943176, + "step": 1292 + }, + { + "epoch": 4.299500831946755, + "loss": 0.687080979347229, + "loss_ce": 6.930784729775041e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.042724609375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 80943176, + "step": 1292 + }, + { + "epoch": 4.302828618968386, + "grad_norm": 12.758271217346191, + "learning_rate": 5e-06, + "loss": 0.8416, + "num_input_tokens_seen": 81007208, + "step": 1293 + }, + { + "epoch": 4.302828618968386, + "loss": 0.7446075677871704, + "loss_ce": 0.0004669097252190113, + "loss_iou": 0.275390625, + "loss_num": 0.038818359375, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 81007208, + "step": 1293 + }, + { + "epoch": 4.306156405990016, + "grad_norm": 52.66012954711914, + "learning_rate": 5e-06, + "loss": 0.5894, + "num_input_tokens_seen": 81067976, + "step": 1294 + }, + { + "epoch": 4.306156405990016, + "loss": 0.37765905261039734, + "loss_ce": 0.0007974714390002191, + "loss_iou": 0.10693359375, + "loss_num": 0.03271484375, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 81067976, + "step": 1294 + }, + { + "epoch": 4.309484193011647, + "grad_norm": 12.44371509552002, + "learning_rate": 5e-06, + "loss": 0.5179, + "num_input_tokens_seen": 81128904, + "step": 1295 + }, + { + "epoch": 4.309484193011647, + "loss": 0.6360405683517456, + "loss_ce": 5.427756696008146e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.0306396484375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 81128904, + "step": 1295 + }, + { + "epoch": 4.312811980033278, + "grad_norm": 24.886842727661133, + "learning_rate": 5e-06, + "loss": 0.705, + "num_input_tokens_seen": 81191592, + "step": 1296 + }, + { + "epoch": 4.312811980033278, + "loss": 0.7338913083076477, + "loss_ce": 4.5570727706945036e-06, + "loss_iou": 0.2890625, + "loss_num": 0.03076171875, + "loss_xval": 0.734375, + "num_input_tokens_seen": 81191592, + "step": 1296 + }, + { + "epoch": 4.316139767054908, + "grad_norm": 30.665220260620117, + "learning_rate": 5e-06, + "loss": 0.7969, + "num_input_tokens_seen": 81255356, + "step": 1297 + }, + { + "epoch": 4.316139767054908, + "loss": 0.7743918895721436, + "loss_ce": 0.001564753009006381, + "loss_iou": 0.25390625, + "loss_num": 0.05322265625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 81255356, + "step": 1297 + }, + { + "epoch": 4.319467554076539, + "grad_norm": 29.902305603027344, + "learning_rate": 5e-06, + "loss": 0.8438, + "num_input_tokens_seen": 81318216, + "step": 1298 + }, + { + "epoch": 4.319467554076539, + "loss": 0.7068020105361938, + "loss_ce": 1.4880570233799517e-05, + "loss_iou": 0.251953125, + "loss_num": 0.040283203125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 81318216, + "step": 1298 + }, + { + "epoch": 4.322795341098169, + "grad_norm": 13.050071716308594, + "learning_rate": 5e-06, + "loss": 0.6442, + "num_input_tokens_seen": 81381692, + "step": 1299 + }, + { + "epoch": 4.322795341098169, + "loss": 0.3645215630531311, + "loss_ce": 1.962293026736006e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.02099609375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 81381692, + "step": 1299 + }, + { + "epoch": 4.3261231281198, + "grad_norm": 14.20059871673584, + "learning_rate": 5e-06, + "loss": 0.6968, + "num_input_tokens_seen": 81445260, + "step": 1300 + }, + { + "epoch": 4.3261231281198, + "loss": 0.9003438353538513, + "loss_ce": 0.0004414235008880496, + "loss_iou": 0.314453125, + "loss_num": 0.0546875, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 81445260, + "step": 1300 + }, + { + "epoch": 4.329450915141431, + "grad_norm": 10.232033729553223, + "learning_rate": 5e-06, + "loss": 0.6684, + "num_input_tokens_seen": 81507220, + "step": 1301 + }, + { + "epoch": 4.329450915141431, + "loss": 0.922446608543396, + "loss_ce": 0.00044959314982406795, + "loss_iou": 0.30078125, + "loss_num": 0.06396484375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 81507220, + "step": 1301 + }, + { + "epoch": 4.332778702163061, + "grad_norm": 8.437857627868652, + "learning_rate": 5e-06, + "loss": 0.5038, + "num_input_tokens_seen": 81570812, + "step": 1302 + }, + { + "epoch": 4.332778702163061, + "loss": 0.6514133214950562, + "loss_ce": 4.610713222064078e-05, + "loss_iou": 0.2314453125, + "loss_num": 0.037353515625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 81570812, + "step": 1302 + }, + { + "epoch": 4.336106489184692, + "grad_norm": 9.5712308883667, + "learning_rate": 5e-06, + "loss": 0.9257, + "num_input_tokens_seen": 81634604, + "step": 1303 + }, + { + "epoch": 4.336106489184692, + "loss": 1.1645619869232178, + "loss_ce": 0.0007436010637320578, + "loss_iou": 0.376953125, + "loss_num": 0.08203125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 81634604, + "step": 1303 + }, + { + "epoch": 4.3394342762063225, + "grad_norm": 24.645009994506836, + "learning_rate": 5e-06, + "loss": 0.8503, + "num_input_tokens_seen": 81698128, + "step": 1304 + }, + { + "epoch": 4.3394342762063225, + "loss": 0.7591370940208435, + "loss_ce": 0.0015687549021095037, + "loss_iou": 0.244140625, + "loss_num": 0.0537109375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 81698128, + "step": 1304 + }, + { + "epoch": 4.342762063227953, + "grad_norm": 18.755535125732422, + "learning_rate": 5e-06, + "loss": 0.8487, + "num_input_tokens_seen": 81760788, + "step": 1305 + }, + { + "epoch": 4.342762063227953, + "loss": 0.7894872426986694, + "loss_ce": 0.001157181104645133, + "loss_iou": 0.248046875, + "loss_num": 0.05859375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 81760788, + "step": 1305 + }, + { + "epoch": 4.346089850249584, + "grad_norm": 11.36084270477295, + "learning_rate": 5e-06, + "loss": 0.8839, + "num_input_tokens_seen": 81823160, + "step": 1306 + }, + { + "epoch": 4.346089850249584, + "loss": 0.6701819896697998, + "loss_ce": 1.5993844499462284e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0303955078125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 81823160, + "step": 1306 + }, + { + "epoch": 4.349417637271214, + "grad_norm": 14.648335456848145, + "learning_rate": 5e-06, + "loss": 0.96, + "num_input_tokens_seen": 81885560, + "step": 1307 + }, + { + "epoch": 4.349417637271214, + "loss": 1.0643436908721924, + "loss_ce": 0.001599527895450592, + "loss_iou": 0.322265625, + "loss_num": 0.083984375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 81885560, + "step": 1307 + }, + { + "epoch": 4.352745424292845, + "grad_norm": 15.327980995178223, + "learning_rate": 5e-06, + "loss": 0.5124, + "num_input_tokens_seen": 81947560, + "step": 1308 + }, + { + "epoch": 4.352745424292845, + "loss": 0.28150349855422974, + "loss_ce": 0.0004366114444565028, + "loss_iou": 0.04931640625, + "loss_num": 0.036376953125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 81947560, + "step": 1308 + }, + { + "epoch": 4.356073211314476, + "grad_norm": 12.198101043701172, + "learning_rate": 5e-06, + "loss": 0.6174, + "num_input_tokens_seen": 82008732, + "step": 1309 + }, + { + "epoch": 4.356073211314476, + "loss": 0.6670141816139221, + "loss_ce": 0.001486798282712698, + "loss_iou": 0.2197265625, + "loss_num": 0.045166015625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 82008732, + "step": 1309 + }, + { + "epoch": 4.359400998336106, + "grad_norm": 12.514129638671875, + "learning_rate": 5e-06, + "loss": 0.6429, + "num_input_tokens_seen": 82071748, + "step": 1310 + }, + { + "epoch": 4.359400998336106, + "loss": 0.5801935195922852, + "loss_ce": 5.4305866797221825e-05, + "loss_iou": 0.1875, + "loss_num": 0.041015625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 82071748, + "step": 1310 + }, + { + "epoch": 4.362728785357737, + "grad_norm": 12.292180061340332, + "learning_rate": 5e-06, + "loss": 0.7611, + "num_input_tokens_seen": 82135220, + "step": 1311 + }, + { + "epoch": 4.362728785357737, + "loss": 0.6303189992904663, + "loss_ce": 0.00043624168029055, + "loss_iou": 0.26953125, + "loss_num": 0.018310546875, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 82135220, + "step": 1311 + }, + { + "epoch": 4.366056572379367, + "grad_norm": 19.755245208740234, + "learning_rate": 5e-06, + "loss": 0.9127, + "num_input_tokens_seen": 82198120, + "step": 1312 + }, + { + "epoch": 4.366056572379367, + "loss": 0.985588788986206, + "loss_ce": 0.0004814087296836078, + "loss_iou": 0.36328125, + "loss_num": 0.052001953125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 82198120, + "step": 1312 + }, + { + "epoch": 4.369384359400998, + "grad_norm": 16.86263656616211, + "learning_rate": 5e-06, + "loss": 0.6468, + "num_input_tokens_seen": 82260076, + "step": 1313 + }, + { + "epoch": 4.369384359400998, + "loss": 0.682517409324646, + "loss_ce": 0.000388490705518052, + "loss_iou": 0.224609375, + "loss_num": 0.046630859375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 82260076, + "step": 1313 + }, + { + "epoch": 4.372712146422629, + "grad_norm": 9.756426811218262, + "learning_rate": 5e-06, + "loss": 0.9654, + "num_input_tokens_seen": 82322316, + "step": 1314 + }, + { + "epoch": 4.372712146422629, + "loss": 0.9413772821426392, + "loss_ce": 0.00021517441200558096, + "loss_iou": 0.318359375, + "loss_num": 0.060546875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 82322316, + "step": 1314 + }, + { + "epoch": 4.376039933444259, + "grad_norm": 11.446276664733887, + "learning_rate": 5e-06, + "loss": 0.6246, + "num_input_tokens_seen": 82386100, + "step": 1315 + }, + { + "epoch": 4.376039933444259, + "loss": 0.7032071352005005, + "loss_ce": 0.0006314606289379299, + "loss_iou": 0.267578125, + "loss_num": 0.033203125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 82386100, + "step": 1315 + }, + { + "epoch": 4.37936772046589, + "grad_norm": 11.805602073669434, + "learning_rate": 5e-06, + "loss": 0.8133, + "num_input_tokens_seen": 82448276, + "step": 1316 + }, + { + "epoch": 4.37936772046589, + "loss": 0.5263853669166565, + "loss_ce": 1.8209135305369273e-05, + "loss_iou": 0.1376953125, + "loss_num": 0.05029296875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 82448276, + "step": 1316 + }, + { + "epoch": 4.3826955074875205, + "grad_norm": 14.017489433288574, + "learning_rate": 5e-06, + "loss": 0.7262, + "num_input_tokens_seen": 82509572, + "step": 1317 + }, + { + "epoch": 4.3826955074875205, + "loss": 0.7585628032684326, + "loss_ce": 0.0005061531555838883, + "loss_iou": 0.2138671875, + "loss_num": 0.06640625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 82509572, + "step": 1317 + }, + { + "epoch": 4.386023294509151, + "grad_norm": 9.340023040771484, + "learning_rate": 5e-06, + "loss": 0.9733, + "num_input_tokens_seen": 82572988, + "step": 1318 + }, + { + "epoch": 4.386023294509151, + "loss": 0.9580419063568115, + "loss_ce": 0.00015617434110026807, + "loss_iou": 0.294921875, + "loss_num": 0.07373046875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 82572988, + "step": 1318 + }, + { + "epoch": 4.389351081530782, + "grad_norm": 23.777090072631836, + "learning_rate": 5e-06, + "loss": 0.6908, + "num_input_tokens_seen": 82635976, + "step": 1319 + }, + { + "epoch": 4.389351081530782, + "loss": 0.8914474248886108, + "loss_ce": 0.0010665201116353273, + "loss_iou": 0.34375, + "loss_num": 0.040771484375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 82635976, + "step": 1319 + }, + { + "epoch": 4.392678868552412, + "grad_norm": 22.563859939575195, + "learning_rate": 5e-06, + "loss": 0.6377, + "num_input_tokens_seen": 82700040, + "step": 1320 + }, + { + "epoch": 4.392678868552412, + "loss": 0.6494787931442261, + "loss_ce": 0.002262030728161335, + "loss_iou": 0.201171875, + "loss_num": 0.048828125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 82700040, + "step": 1320 + }, + { + "epoch": 4.396006655574043, + "grad_norm": 16.087520599365234, + "learning_rate": 5e-06, + "loss": 0.677, + "num_input_tokens_seen": 82763308, + "step": 1321 + }, + { + "epoch": 4.396006655574043, + "loss": 0.8195084929466248, + "loss_ce": 0.0006608610274270177, + "loss_iou": 0.275390625, + "loss_num": 0.0537109375, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 82763308, + "step": 1321 + }, + { + "epoch": 4.3993344425956735, + "grad_norm": 15.090987205505371, + "learning_rate": 5e-06, + "loss": 0.6979, + "num_input_tokens_seen": 82826744, + "step": 1322 + }, + { + "epoch": 4.3993344425956735, + "loss": 0.4208059310913086, + "loss_ce": 2.9584578442154452e-05, + "loss_iou": 0.1337890625, + "loss_num": 0.030517578125, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 82826744, + "step": 1322 + }, + { + "epoch": 4.402662229617304, + "grad_norm": 14.373215675354004, + "learning_rate": 5e-06, + "loss": 0.7911, + "num_input_tokens_seen": 82889604, + "step": 1323 + }, + { + "epoch": 4.402662229617304, + "loss": 0.6858798265457153, + "loss_ce": 0.0013094794703647494, + "loss_iou": 0.279296875, + "loss_num": 0.025390625, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 82889604, + "step": 1323 + }, + { + "epoch": 4.405990016638935, + "grad_norm": 29.24193572998047, + "learning_rate": 5e-06, + "loss": 0.9223, + "num_input_tokens_seen": 82952864, + "step": 1324 + }, + { + "epoch": 4.405990016638935, + "loss": 1.03691828250885, + "loss_ce": 5.295296432450414e-05, + "loss_iou": 0.408203125, + "loss_num": 0.04443359375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 82952864, + "step": 1324 + }, + { + "epoch": 4.409317803660565, + "grad_norm": 14.150858879089355, + "learning_rate": 5e-06, + "loss": 0.7532, + "num_input_tokens_seen": 83015712, + "step": 1325 + }, + { + "epoch": 4.409317803660565, + "loss": 0.8280280232429504, + "loss_ce": 0.00020820634381379932, + "loss_iou": 0.322265625, + "loss_num": 0.037109375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 83015712, + "step": 1325 + }, + { + "epoch": 4.412645590682196, + "grad_norm": 29.858686447143555, + "learning_rate": 5e-06, + "loss": 0.6685, + "num_input_tokens_seen": 83078676, + "step": 1326 + }, + { + "epoch": 4.412645590682196, + "loss": 0.7052803039550781, + "loss_ce": 0.00044628471368923783, + "loss_iou": 0.25390625, + "loss_num": 0.039794921875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 83078676, + "step": 1326 + }, + { + "epoch": 4.415973377703827, + "grad_norm": 15.250243186950684, + "learning_rate": 5e-06, + "loss": 0.6323, + "num_input_tokens_seen": 83141596, + "step": 1327 + }, + { + "epoch": 4.415973377703827, + "loss": 0.8748631477355957, + "loss_ce": 0.00022933242144063115, + "loss_iou": 0.30078125, + "loss_num": 0.054931640625, + "loss_xval": 0.875, + "num_input_tokens_seen": 83141596, + "step": 1327 + }, + { + "epoch": 4.419301164725457, + "grad_norm": 17.645246505737305, + "learning_rate": 5e-06, + "loss": 0.7055, + "num_input_tokens_seen": 83202652, + "step": 1328 + }, + { + "epoch": 4.419301164725457, + "loss": 0.8196154832839966, + "loss_ce": 3.535650466801599e-05, + "loss_iou": 0.2421875, + "loss_num": 0.06689453125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 83202652, + "step": 1328 + }, + { + "epoch": 4.422628951747088, + "grad_norm": 10.930619239807129, + "learning_rate": 5e-06, + "loss": 0.5952, + "num_input_tokens_seen": 83266684, + "step": 1329 + }, + { + "epoch": 4.422628951747088, + "loss": 0.5401097536087036, + "loss_ce": 7.067422848194838e-05, + "loss_iou": 0.1904296875, + "loss_num": 0.031982421875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 83266684, + "step": 1329 + }, + { + "epoch": 4.425956738768718, + "grad_norm": 21.14279556274414, + "learning_rate": 5e-06, + "loss": 0.7999, + "num_input_tokens_seen": 83330424, + "step": 1330 + }, + { + "epoch": 4.425956738768718, + "loss": 0.5837583541870117, + "loss_ce": 1.807482840376906e-05, + "loss_iou": 0.1982421875, + "loss_num": 0.037353515625, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 83330424, + "step": 1330 + }, + { + "epoch": 4.429284525790349, + "grad_norm": 18.78822135925293, + "learning_rate": 5e-06, + "loss": 0.7177, + "num_input_tokens_seen": 83394464, + "step": 1331 + }, + { + "epoch": 4.429284525790349, + "loss": 0.7250956296920776, + "loss_ce": 0.00024215054872911423, + "loss_iou": 0.232421875, + "loss_num": 0.052001953125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 83394464, + "step": 1331 + }, + { + "epoch": 4.43261231281198, + "grad_norm": 17.859045028686523, + "learning_rate": 5e-06, + "loss": 0.5746, + "num_input_tokens_seen": 83456320, + "step": 1332 + }, + { + "epoch": 4.43261231281198, + "loss": 0.5926786065101624, + "loss_ce": 2.7240082999924198e-05, + "loss_iou": 0.193359375, + "loss_num": 0.04150390625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 83456320, + "step": 1332 + }, + { + "epoch": 4.43594009983361, + "grad_norm": 28.78057861328125, + "learning_rate": 5e-06, + "loss": 0.725, + "num_input_tokens_seen": 83519880, + "step": 1333 + }, + { + "epoch": 4.43594009983361, + "loss": 0.6718736886978149, + "loss_ce": 0.0007310921791940928, + "loss_iou": 0.23828125, + "loss_num": 0.038818359375, + "loss_xval": 0.671875, + "num_input_tokens_seen": 83519880, + "step": 1333 + }, + { + "epoch": 4.439267886855241, + "grad_norm": 13.343917846679688, + "learning_rate": 5e-06, + "loss": 0.6955, + "num_input_tokens_seen": 83581528, + "step": 1334 + }, + { + "epoch": 4.439267886855241, + "loss": 0.6046224236488342, + "loss_ce": 0.00013025110820308328, + "loss_iou": 0.197265625, + "loss_num": 0.041748046875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 83581528, + "step": 1334 + }, + { + "epoch": 4.4425956738768715, + "grad_norm": 24.18421745300293, + "learning_rate": 5e-06, + "loss": 0.9336, + "num_input_tokens_seen": 83645572, + "step": 1335 + }, + { + "epoch": 4.4425956738768715, + "loss": 0.9804202318191528, + "loss_ce": 0.00019555243488866836, + "loss_iou": 0.37109375, + "loss_num": 0.048095703125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 83645572, + "step": 1335 + }, + { + "epoch": 4.445923460898502, + "grad_norm": 19.95655632019043, + "learning_rate": 5e-06, + "loss": 0.6435, + "num_input_tokens_seen": 83708536, + "step": 1336 + }, + { + "epoch": 4.445923460898502, + "loss": 0.7947807312011719, + "loss_ce": 0.00046916649444028735, + "loss_iou": 0.29296875, + "loss_num": 0.0419921875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 83708536, + "step": 1336 + }, + { + "epoch": 4.449251247920133, + "grad_norm": 13.971914291381836, + "learning_rate": 5e-06, + "loss": 0.6339, + "num_input_tokens_seen": 83771940, + "step": 1337 + }, + { + "epoch": 4.449251247920133, + "loss": 0.627832293510437, + "loss_ce": 2.4645694793434814e-05, + "loss_iou": 0.21875, + "loss_num": 0.03759765625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 83771940, + "step": 1337 + }, + { + "epoch": 4.452579034941763, + "grad_norm": 28.279212951660156, + "learning_rate": 5e-06, + "loss": 0.5821, + "num_input_tokens_seen": 83833204, + "step": 1338 + }, + { + "epoch": 4.452579034941763, + "loss": 0.6125773191452026, + "loss_ce": 0.00015060522127896547, + "loss_iou": 0.126953125, + "loss_num": 0.07177734375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 83833204, + "step": 1338 + }, + { + "epoch": 4.455906821963394, + "grad_norm": 24.268396377563477, + "learning_rate": 5e-06, + "loss": 0.82, + "num_input_tokens_seen": 83895096, + "step": 1339 + }, + { + "epoch": 4.455906821963394, + "loss": 0.8381412029266357, + "loss_ce": 6.506794306915253e-06, + "loss_iou": 0.265625, + "loss_num": 0.0615234375, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 83895096, + "step": 1339 + }, + { + "epoch": 4.4592346089850246, + "grad_norm": 8.350409507751465, + "learning_rate": 5e-06, + "loss": 0.8535, + "num_input_tokens_seen": 83957376, + "step": 1340 + }, + { + "epoch": 4.4592346089850246, + "loss": 1.058552861213684, + "loss_ce": 0.0008135715615935624, + "loss_iou": 0.376953125, + "loss_num": 0.060546875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 83957376, + "step": 1340 + }, + { + "epoch": 4.462562396006655, + "grad_norm": 17.044376373291016, + "learning_rate": 5e-06, + "loss": 0.767, + "num_input_tokens_seen": 84020204, + "step": 1341 + }, + { + "epoch": 4.462562396006655, + "loss": 0.725034236907959, + "loss_ce": 0.0011573644587770104, + "loss_iou": 0.2392578125, + "loss_num": 0.04931640625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 84020204, + "step": 1341 + }, + { + "epoch": 4.465890183028286, + "grad_norm": 11.960426330566406, + "learning_rate": 5e-06, + "loss": 0.9019, + "num_input_tokens_seen": 84085084, + "step": 1342 + }, + { + "epoch": 4.465890183028286, + "loss": 0.8089953660964966, + "loss_ce": 0.0013781640445813537, + "loss_iou": 0.263671875, + "loss_num": 0.05615234375, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 84085084, + "step": 1342 + }, + { + "epoch": 4.469217970049916, + "grad_norm": 13.375765800476074, + "learning_rate": 5e-06, + "loss": 0.6083, + "num_input_tokens_seen": 84147448, + "step": 1343 + }, + { + "epoch": 4.469217970049916, + "loss": 0.7444367408752441, + "loss_ce": 0.0001740536536090076, + "loss_iou": 0.267578125, + "loss_num": 0.04150390625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 84147448, + "step": 1343 + }, + { + "epoch": 4.472545757071547, + "grad_norm": 24.798673629760742, + "learning_rate": 5e-06, + "loss": 1.0596, + "num_input_tokens_seen": 84211792, + "step": 1344 + }, + { + "epoch": 4.472545757071547, + "loss": 1.0900293588638306, + "loss_ce": 0.001162198605015874, + "loss_iou": 0.3671875, + "loss_num": 0.07080078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 84211792, + "step": 1344 + }, + { + "epoch": 4.475873544093178, + "grad_norm": 46.3792610168457, + "learning_rate": 5e-06, + "loss": 0.7852, + "num_input_tokens_seen": 84274836, + "step": 1345 + }, + { + "epoch": 4.475873544093178, + "loss": 0.5791232585906982, + "loss_ce": 2.174517430830747e-05, + "loss_iou": 0.216796875, + "loss_num": 0.029052734375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 84274836, + "step": 1345 + }, + { + "epoch": 4.479201331114808, + "grad_norm": 19.27472496032715, + "learning_rate": 5e-06, + "loss": 0.5985, + "num_input_tokens_seen": 84338324, + "step": 1346 + }, + { + "epoch": 4.479201331114808, + "loss": 0.7217234373092651, + "loss_ce": 0.0005319793708622456, + "loss_iou": 0.28515625, + "loss_num": 0.0299072265625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 84338324, + "step": 1346 + }, + { + "epoch": 4.482529118136439, + "grad_norm": 20.793636322021484, + "learning_rate": 5e-06, + "loss": 0.7234, + "num_input_tokens_seen": 84400104, + "step": 1347 + }, + { + "epoch": 4.482529118136439, + "loss": 0.6533622741699219, + "loss_ce": 0.00040814554085955024, + "loss_iou": 0.2060546875, + "loss_num": 0.048095703125, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 84400104, + "step": 1347 + }, + { + "epoch": 4.4858569051580695, + "grad_norm": 12.040908813476562, + "learning_rate": 5e-06, + "loss": 0.815, + "num_input_tokens_seen": 84462732, + "step": 1348 + }, + { + "epoch": 4.4858569051580695, + "loss": 0.7836884260177612, + "loss_ce": 0.00048530942876823246, + "loss_iou": 0.265625, + "loss_num": 0.05029296875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 84462732, + "step": 1348 + }, + { + "epoch": 4.4891846921797, + "grad_norm": 18.848194122314453, + "learning_rate": 5e-06, + "loss": 0.8167, + "num_input_tokens_seen": 84526312, + "step": 1349 + }, + { + "epoch": 4.4891846921797, + "loss": 0.8186991214752197, + "loss_ce": 0.0005839316290803254, + "loss_iou": 0.267578125, + "loss_num": 0.056640625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 84526312, + "step": 1349 + }, + { + "epoch": 4.492512479201331, + "grad_norm": 16.940589904785156, + "learning_rate": 5e-06, + "loss": 0.8814, + "num_input_tokens_seen": 84589304, + "step": 1350 + }, + { + "epoch": 4.492512479201331, + "loss": 0.7323105335235596, + "loss_ce": 0.0004990027518942952, + "loss_iou": 0.244140625, + "loss_num": 0.048583984375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 84589304, + "step": 1350 + }, + { + "epoch": 4.495840266222961, + "grad_norm": 12.424930572509766, + "learning_rate": 5e-06, + "loss": 0.6786, + "num_input_tokens_seen": 84653056, + "step": 1351 + }, + { + "epoch": 4.495840266222961, + "loss": 0.594731330871582, + "loss_ce": 4.724020072899293e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.03173828125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 84653056, + "step": 1351 + }, + { + "epoch": 4.499168053244592, + "grad_norm": 8.971871376037598, + "learning_rate": 5e-06, + "loss": 0.719, + "num_input_tokens_seen": 84715604, + "step": 1352 + }, + { + "epoch": 4.499168053244592, + "loss": 0.7548971176147461, + "loss_ce": 0.00025852146791294217, + "loss_iou": 0.251953125, + "loss_num": 0.0498046875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 84715604, + "step": 1352 + }, + { + "epoch": 4.5024958402662225, + "grad_norm": 25.51188087463379, + "learning_rate": 5e-06, + "loss": 0.5393, + "num_input_tokens_seen": 84778444, + "step": 1353 + }, + { + "epoch": 4.5024958402662225, + "loss": 0.4582330584526062, + "loss_ce": 4.212010753690265e-05, + "loss_iou": 0.146484375, + "loss_num": 0.033203125, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 84778444, + "step": 1353 + }, + { + "epoch": 4.505823627287853, + "grad_norm": 28.863351821899414, + "learning_rate": 5e-06, + "loss": 0.586, + "num_input_tokens_seen": 84841420, + "step": 1354 + }, + { + "epoch": 4.505823627287853, + "loss": 0.7277034521102905, + "loss_ce": 0.0015376773662865162, + "loss_iou": 0.2451171875, + "loss_num": 0.04736328125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 84841420, + "step": 1354 + }, + { + "epoch": 4.509151414309484, + "grad_norm": 11.419045448303223, + "learning_rate": 5e-06, + "loss": 0.9489, + "num_input_tokens_seen": 84905164, + "step": 1355 + }, + { + "epoch": 4.509151414309484, + "loss": 0.9230769872665405, + "loss_ce": 0.00022541567159350961, + "loss_iou": 0.318359375, + "loss_num": 0.05712890625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 84905164, + "step": 1355 + }, + { + "epoch": 4.512479201331114, + "grad_norm": 8.96274471282959, + "learning_rate": 5e-06, + "loss": 0.7418, + "num_input_tokens_seen": 84969012, + "step": 1356 + }, + { + "epoch": 4.512479201331114, + "loss": 0.7852834463119507, + "loss_ce": 0.0008596146362833679, + "loss_iou": 0.25, + "loss_num": 0.056640625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 84969012, + "step": 1356 + }, + { + "epoch": 4.515806988352745, + "grad_norm": 8.02625560760498, + "learning_rate": 5e-06, + "loss": 0.6062, + "num_input_tokens_seen": 85031800, + "step": 1357 + }, + { + "epoch": 4.515806988352745, + "loss": 0.5827827453613281, + "loss_ce": 1.9044146029045805e-05, + "loss_iou": 0.201171875, + "loss_num": 0.036376953125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 85031800, + "step": 1357 + }, + { + "epoch": 4.519134775374376, + "grad_norm": 46.9227409362793, + "learning_rate": 5e-06, + "loss": 0.6798, + "num_input_tokens_seen": 85095404, + "step": 1358 + }, + { + "epoch": 4.519134775374376, + "loss": 0.7685559988021851, + "loss_ce": 0.00012336293002590537, + "loss_iou": 0.26171875, + "loss_num": 0.04931640625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 85095404, + "step": 1358 + }, + { + "epoch": 4.522462562396006, + "grad_norm": 34.97386932373047, + "learning_rate": 5e-06, + "loss": 0.6895, + "num_input_tokens_seen": 85158752, + "step": 1359 + }, + { + "epoch": 4.522462562396006, + "loss": 0.8442423343658447, + "loss_ce": 4.0554509723733645e-06, + "loss_iou": 0.33203125, + "loss_num": 0.036376953125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 85158752, + "step": 1359 + }, + { + "epoch": 4.525790349417637, + "grad_norm": 44.28080749511719, + "learning_rate": 5e-06, + "loss": 1.0204, + "num_input_tokens_seen": 85222508, + "step": 1360 + }, + { + "epoch": 4.525790349417637, + "loss": 0.9339932203292847, + "loss_ce": 3.327502417960204e-05, + "loss_iou": 0.341796875, + "loss_num": 0.050537109375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 85222508, + "step": 1360 + }, + { + "epoch": 4.529118136439267, + "grad_norm": 6.506110191345215, + "learning_rate": 5e-06, + "loss": 0.6121, + "num_input_tokens_seen": 85283528, + "step": 1361 + }, + { + "epoch": 4.529118136439267, + "loss": 0.5493087768554688, + "loss_ce": 0.00023652684467379004, + "loss_iou": 0.1865234375, + "loss_num": 0.03515625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 85283528, + "step": 1361 + }, + { + "epoch": 4.532445923460898, + "grad_norm": 10.900887489318848, + "learning_rate": 5e-06, + "loss": 0.7382, + "num_input_tokens_seen": 85347012, + "step": 1362 + }, + { + "epoch": 4.532445923460898, + "loss": 0.7557834386825562, + "loss_ce": 0.000931144692003727, + "loss_iou": 0.255859375, + "loss_num": 0.048583984375, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 85347012, + "step": 1362 + }, + { + "epoch": 4.535773710482529, + "grad_norm": 35.10896301269531, + "learning_rate": 5e-06, + "loss": 0.766, + "num_input_tokens_seen": 85409800, + "step": 1363 + }, + { + "epoch": 4.535773710482529, + "loss": 0.899711549282074, + "loss_ce": 0.0012740622041746974, + "loss_iou": 0.3671875, + "loss_num": 0.032470703125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 85409800, + "step": 1363 + }, + { + "epoch": 4.539101497504159, + "grad_norm": 34.48576354980469, + "learning_rate": 5e-06, + "loss": 0.8018, + "num_input_tokens_seen": 85471848, + "step": 1364 + }, + { + "epoch": 4.539101497504159, + "loss": 0.7342795133590698, + "loss_ce": 0.0008810532744973898, + "loss_iou": 0.25390625, + "loss_num": 0.045166015625, + "loss_xval": 0.734375, + "num_input_tokens_seen": 85471848, + "step": 1364 + }, + { + "epoch": 4.54242928452579, + "grad_norm": 8.720282554626465, + "learning_rate": 5e-06, + "loss": 0.6187, + "num_input_tokens_seen": 85535028, + "step": 1365 + }, + { + "epoch": 4.54242928452579, + "loss": 0.6450221538543701, + "loss_ce": 2.5852832550299354e-06, + "loss_iou": 0.21484375, + "loss_num": 0.042724609375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 85535028, + "step": 1365 + }, + { + "epoch": 4.5457570715474205, + "grad_norm": 31.563127517700195, + "learning_rate": 5e-06, + "loss": 0.6799, + "num_input_tokens_seen": 85598856, + "step": 1366 + }, + { + "epoch": 4.5457570715474205, + "loss": 0.5118007659912109, + "loss_ce": 0.0013026673113927245, + "loss_iou": 0.1318359375, + "loss_num": 0.04931640625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 85598856, + "step": 1366 + }, + { + "epoch": 4.549084858569051, + "grad_norm": 16.306110382080078, + "learning_rate": 5e-06, + "loss": 0.4548, + "num_input_tokens_seen": 85661384, + "step": 1367 + }, + { + "epoch": 4.549084858569051, + "loss": 0.43176573514938354, + "loss_ce": 3.0542746571882162e-06, + "loss_iou": 0.1328125, + "loss_num": 0.033447265625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 85661384, + "step": 1367 + }, + { + "epoch": 4.552412645590682, + "grad_norm": 11.156771659851074, + "learning_rate": 5e-06, + "loss": 0.6683, + "num_input_tokens_seen": 85724720, + "step": 1368 + }, + { + "epoch": 4.552412645590682, + "loss": 0.8609325289726257, + "loss_ce": 0.0008250840473920107, + "loss_iou": 0.33203125, + "loss_num": 0.038818359375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 85724720, + "step": 1368 + }, + { + "epoch": 4.555740432612312, + "grad_norm": 11.832956314086914, + "learning_rate": 5e-06, + "loss": 0.4961, + "num_input_tokens_seen": 85786512, + "step": 1369 + }, + { + "epoch": 4.555740432612312, + "loss": 0.27058541774749756, + "loss_ce": 1.6586316633038223e-05, + "loss_iou": 0.0810546875, + "loss_num": 0.0218505859375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 85786512, + "step": 1369 + }, + { + "epoch": 4.559068219633943, + "grad_norm": 10.155105590820312, + "learning_rate": 5e-06, + "loss": 0.7228, + "num_input_tokens_seen": 85849112, + "step": 1370 + }, + { + "epoch": 4.559068219633943, + "loss": 0.6971813440322876, + "loss_ce": 0.0015025895554572344, + "loss_iou": 0.2275390625, + "loss_num": 0.0478515625, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 85849112, + "step": 1370 + }, + { + "epoch": 4.5623960066555735, + "grad_norm": 13.69803524017334, + "learning_rate": 5e-06, + "loss": 0.6738, + "num_input_tokens_seen": 85912496, + "step": 1371 + }, + { + "epoch": 4.5623960066555735, + "loss": 0.6464178562164307, + "loss_ce": 0.00042175239650532603, + "loss_iou": 0.1982421875, + "loss_num": 0.050048828125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 85912496, + "step": 1371 + }, + { + "epoch": 4.565723793677205, + "grad_norm": 14.567577362060547, + "learning_rate": 5e-06, + "loss": 0.4595, + "num_input_tokens_seen": 85974008, + "step": 1372 + }, + { + "epoch": 4.565723793677205, + "loss": 0.5371776223182678, + "loss_ce": 0.00019030201656278223, + "loss_iou": 0.1796875, + "loss_num": 0.035400390625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 85974008, + "step": 1372 + }, + { + "epoch": 4.569051580698836, + "grad_norm": 11.320833206176758, + "learning_rate": 5e-06, + "loss": 0.4038, + "num_input_tokens_seen": 86036360, + "step": 1373 + }, + { + "epoch": 4.569051580698836, + "loss": 0.4081704616546631, + "loss_ce": 0.00021149149688426405, + "loss_iou": 0.1162109375, + "loss_num": 0.03515625, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 86036360, + "step": 1373 + }, + { + "epoch": 4.572379367720466, + "grad_norm": 8.902009010314941, + "learning_rate": 5e-06, + "loss": 0.6014, + "num_input_tokens_seen": 86098024, + "step": 1374 + }, + { + "epoch": 4.572379367720466, + "loss": 0.531557559967041, + "loss_ce": 2.3850705019867746e-06, + "loss_iou": 0.138671875, + "loss_num": 0.051025390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 86098024, + "step": 1374 + }, + { + "epoch": 4.575707154742097, + "grad_norm": 8.554510116577148, + "learning_rate": 5e-06, + "loss": 0.8192, + "num_input_tokens_seen": 86160932, + "step": 1375 + }, + { + "epoch": 4.575707154742097, + "loss": 1.052072525024414, + "loss_ce": 0.0005588348722085357, + "loss_iou": 0.380859375, + "loss_num": 0.057861328125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 86160932, + "step": 1375 + }, + { + "epoch": 4.5790349417637275, + "grad_norm": 13.125633239746094, + "learning_rate": 5e-06, + "loss": 0.6643, + "num_input_tokens_seen": 86223500, + "step": 1376 + }, + { + "epoch": 4.5790349417637275, + "loss": 0.6385508179664612, + "loss_ce": 0.00015362344856839627, + "loss_iou": 0.23828125, + "loss_num": 0.0322265625, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 86223500, + "step": 1376 + }, + { + "epoch": 4.582362728785358, + "grad_norm": 8.183598518371582, + "learning_rate": 5e-06, + "loss": 0.5159, + "num_input_tokens_seen": 86284812, + "step": 1377 + }, + { + "epoch": 4.582362728785358, + "loss": 0.6321967840194702, + "loss_ce": 0.00036084238672629, + "loss_iou": 0.2412109375, + "loss_num": 0.0302734375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 86284812, + "step": 1377 + }, + { + "epoch": 4.585690515806989, + "grad_norm": 12.763681411743164, + "learning_rate": 5e-06, + "loss": 0.6332, + "num_input_tokens_seen": 86348508, + "step": 1378 + }, + { + "epoch": 4.585690515806989, + "loss": 0.5611585378646851, + "loss_ce": 0.00012342022091615945, + "loss_iou": 0.19921875, + "loss_num": 0.03271484375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 86348508, + "step": 1378 + }, + { + "epoch": 4.589018302828619, + "grad_norm": 6.641970634460449, + "learning_rate": 5e-06, + "loss": 0.5477, + "num_input_tokens_seen": 86409256, + "step": 1379 + }, + { + "epoch": 4.589018302828619, + "loss": 0.5002583265304565, + "loss_ce": 1.4131117495708168e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.033935546875, + "loss_xval": 0.5, + "num_input_tokens_seen": 86409256, + "step": 1379 + }, + { + "epoch": 4.59234608985025, + "grad_norm": 26.70740509033203, + "learning_rate": 5e-06, + "loss": 0.9453, + "num_input_tokens_seen": 86473068, + "step": 1380 + }, + { + "epoch": 4.59234608985025, + "loss": 1.0360589027404785, + "loss_ce": 4.8118923587026075e-05, + "loss_iou": 0.369140625, + "loss_num": 0.059814453125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 86473068, + "step": 1380 + }, + { + "epoch": 4.595673876871881, + "grad_norm": 8.058368682861328, + "learning_rate": 5e-06, + "loss": 0.7622, + "num_input_tokens_seen": 86535376, + "step": 1381 + }, + { + "epoch": 4.595673876871881, + "loss": 0.6889675259590149, + "loss_ce": 2.6555962904240005e-06, + "loss_iou": 0.2421875, + "loss_num": 0.041259765625, + "loss_xval": 0.6875, + "num_input_tokens_seen": 86535376, + "step": 1381 + }, + { + "epoch": 4.599001663893511, + "grad_norm": 18.06012725830078, + "learning_rate": 5e-06, + "loss": 0.7193, + "num_input_tokens_seen": 86598728, + "step": 1382 + }, + { + "epoch": 4.599001663893511, + "loss": 0.5724185705184937, + "loss_ce": 0.00015296557103283703, + "loss_iou": 0.2001953125, + "loss_num": 0.034423828125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 86598728, + "step": 1382 + }, + { + "epoch": 4.602329450915142, + "grad_norm": 6.92100715637207, + "learning_rate": 5e-06, + "loss": 0.743, + "num_input_tokens_seen": 86661740, + "step": 1383 + }, + { + "epoch": 4.602329450915142, + "loss": 0.6485388875007629, + "loss_ce": 0.000711729924660176, + "loss_iou": 0.2001953125, + "loss_num": 0.04931640625, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 86661740, + "step": 1383 + }, + { + "epoch": 4.605657237936772, + "grad_norm": 14.679880142211914, + "learning_rate": 5e-06, + "loss": 0.7799, + "num_input_tokens_seen": 86726256, + "step": 1384 + }, + { + "epoch": 4.605657237936772, + "loss": 0.7865468859672546, + "loss_ce": 0.0009023505263030529, + "loss_iou": 0.3125, + "loss_num": 0.031982421875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 86726256, + "step": 1384 + }, + { + "epoch": 4.608985024958403, + "grad_norm": 15.619338989257812, + "learning_rate": 5e-06, + "loss": 0.5165, + "num_input_tokens_seen": 86787164, + "step": 1385 + }, + { + "epoch": 4.608985024958403, + "loss": 0.5341886281967163, + "loss_ce": 8.948968570621219e-06, + "loss_iou": 0.1689453125, + "loss_num": 0.0390625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 86787164, + "step": 1385 + }, + { + "epoch": 4.612312811980034, + "grad_norm": 11.672150611877441, + "learning_rate": 5e-06, + "loss": 0.6922, + "num_input_tokens_seen": 86849700, + "step": 1386 + }, + { + "epoch": 4.612312811980034, + "loss": 0.7309243679046631, + "loss_ce": 0.00045559878344647586, + "loss_iou": 0.21875, + "loss_num": 0.058837890625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 86849700, + "step": 1386 + }, + { + "epoch": 4.615640599001664, + "grad_norm": 10.545302391052246, + "learning_rate": 5e-06, + "loss": 0.9422, + "num_input_tokens_seen": 86911848, + "step": 1387 + }, + { + "epoch": 4.615640599001664, + "loss": 1.0675830841064453, + "loss_ce": 0.00026132259517908096, + "loss_iou": 0.404296875, + "loss_num": 0.0517578125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 86911848, + "step": 1387 + }, + { + "epoch": 4.618968386023295, + "grad_norm": 18.017080307006836, + "learning_rate": 5e-06, + "loss": 0.6596, + "num_input_tokens_seen": 86974584, + "step": 1388 + }, + { + "epoch": 4.618968386023295, + "loss": 0.7650818228721619, + "loss_ce": 0.0007995798951014876, + "loss_iou": 0.283203125, + "loss_num": 0.039794921875, + "loss_xval": 0.765625, + "num_input_tokens_seen": 86974584, + "step": 1388 + }, + { + "epoch": 4.6222961730449255, + "grad_norm": 7.790538787841797, + "learning_rate": 5e-06, + "loss": 0.7499, + "num_input_tokens_seen": 87035060, + "step": 1389 + }, + { + "epoch": 4.6222961730449255, + "loss": 0.7779638171195984, + "loss_ce": 9.695755579741672e-06, + "loss_iou": 0.25390625, + "loss_num": 0.053955078125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 87035060, + "step": 1389 + }, + { + "epoch": 4.625623960066556, + "grad_norm": 23.20652198791504, + "learning_rate": 5e-06, + "loss": 0.7455, + "num_input_tokens_seen": 87095904, + "step": 1390 + }, + { + "epoch": 4.625623960066556, + "loss": 0.7988479733467102, + "loss_ce": 0.0002639756421558559, + "loss_iou": 0.283203125, + "loss_num": 0.046142578125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 87095904, + "step": 1390 + }, + { + "epoch": 4.628951747088187, + "grad_norm": 17.283559799194336, + "learning_rate": 5e-06, + "loss": 0.6634, + "num_input_tokens_seen": 87158536, + "step": 1391 + }, + { + "epoch": 4.628951747088187, + "loss": 0.8823537230491638, + "loss_ce": 0.0007619569660164416, + "loss_iou": 0.267578125, + "loss_num": 0.06884765625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 87158536, + "step": 1391 + }, + { + "epoch": 4.632279534109817, + "grad_norm": 12.51486587524414, + "learning_rate": 5e-06, + "loss": 0.7721, + "num_input_tokens_seen": 87221724, + "step": 1392 + }, + { + "epoch": 4.632279534109817, + "loss": 0.759742021560669, + "loss_ce": 9.84323924058117e-05, + "loss_iou": 0.189453125, + "loss_num": 0.076171875, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 87221724, + "step": 1392 + }, + { + "epoch": 4.635607321131448, + "grad_norm": 8.030847549438477, + "learning_rate": 5e-06, + "loss": 0.8579, + "num_input_tokens_seen": 87284540, + "step": 1393 + }, + { + "epoch": 4.635607321131448, + "loss": 0.8183420896530151, + "loss_ce": 0.0014476042706519365, + "loss_iou": 0.30078125, + "loss_num": 0.043212890625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 87284540, + "step": 1393 + }, + { + "epoch": 4.6389351081530785, + "grad_norm": 19.066869735717773, + "learning_rate": 5e-06, + "loss": 0.6132, + "num_input_tokens_seen": 87345048, + "step": 1394 + }, + { + "epoch": 4.6389351081530785, + "loss": 0.7153163552284241, + "loss_ce": 0.00047258762060664594, + "loss_iou": 0.23046875, + "loss_num": 0.05078125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 87345048, + "step": 1394 + }, + { + "epoch": 4.642262895174709, + "grad_norm": 7.339348316192627, + "learning_rate": 5e-06, + "loss": 0.647, + "num_input_tokens_seen": 87408380, + "step": 1395 + }, + { + "epoch": 4.642262895174709, + "loss": 0.777653694152832, + "loss_ce": 0.0004319966828916222, + "loss_iou": 0.2890625, + "loss_num": 0.039794921875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 87408380, + "step": 1395 + }, + { + "epoch": 4.64559068219634, + "grad_norm": 11.174449920654297, + "learning_rate": 5e-06, + "loss": 0.8674, + "num_input_tokens_seen": 87471892, + "step": 1396 + }, + { + "epoch": 4.64559068219634, + "loss": 0.796759307384491, + "loss_ce": 0.0002505228912923485, + "loss_iou": 0.267578125, + "loss_num": 0.052001953125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 87471892, + "step": 1396 + }, + { + "epoch": 4.64891846921797, + "grad_norm": 11.844344139099121, + "learning_rate": 5e-06, + "loss": 0.6369, + "num_input_tokens_seen": 87534040, + "step": 1397 + }, + { + "epoch": 4.64891846921797, + "loss": 0.6389685869216919, + "loss_ce": 0.0004188179736956954, + "loss_iou": 0.21875, + "loss_num": 0.04052734375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 87534040, + "step": 1397 + }, + { + "epoch": 4.652246256239601, + "grad_norm": 15.831585884094238, + "learning_rate": 5e-06, + "loss": 0.9046, + "num_input_tokens_seen": 87596240, + "step": 1398 + }, + { + "epoch": 4.652246256239601, + "loss": 0.7054827213287354, + "loss_ce": 0.0008928956813178957, + "loss_iou": 0.2138671875, + "loss_num": 0.0556640625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 87596240, + "step": 1398 + }, + { + "epoch": 4.655574043261232, + "grad_norm": 6.208771705627441, + "learning_rate": 5e-06, + "loss": 0.6544, + "num_input_tokens_seen": 87657412, + "step": 1399 + }, + { + "epoch": 4.655574043261232, + "loss": 0.7047736644744873, + "loss_ce": 0.00042791658779606223, + "loss_iou": 0.208984375, + "loss_num": 0.05712890625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 87657412, + "step": 1399 + }, + { + "epoch": 4.658901830282862, + "grad_norm": 15.71760082244873, + "learning_rate": 5e-06, + "loss": 0.5694, + "num_input_tokens_seen": 87721156, + "step": 1400 + }, + { + "epoch": 4.658901830282862, + "loss": 0.4697321057319641, + "loss_ce": 0.0004938616184517741, + "loss_iou": 0.146484375, + "loss_num": 0.03515625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 87721156, + "step": 1400 + }, + { + "epoch": 4.662229617304493, + "grad_norm": 6.701347351074219, + "learning_rate": 5e-06, + "loss": 0.8324, + "num_input_tokens_seen": 87784928, + "step": 1401 + }, + { + "epoch": 4.662229617304493, + "loss": 0.851381778717041, + "loss_ce": 0.0012840689159929752, + "loss_iou": 0.294921875, + "loss_num": 0.052001953125, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 87784928, + "step": 1401 + }, + { + "epoch": 4.665557404326123, + "grad_norm": 9.888707160949707, + "learning_rate": 5e-06, + "loss": 0.6811, + "num_input_tokens_seen": 87846888, + "step": 1402 + }, + { + "epoch": 4.665557404326123, + "loss": 0.545545220375061, + "loss_ce": 0.0025764962192624807, + "loss_iou": 0.15625, + "loss_num": 0.046142578125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 87846888, + "step": 1402 + }, + { + "epoch": 4.668885191347754, + "grad_norm": 12.931994438171387, + "learning_rate": 5e-06, + "loss": 0.6414, + "num_input_tokens_seen": 87909236, + "step": 1403 + }, + { + "epoch": 4.668885191347754, + "loss": 0.5946015119552612, + "loss_ce": 0.000485275435494259, + "loss_iou": 0.208984375, + "loss_num": 0.03515625, + "loss_xval": 0.59375, + "num_input_tokens_seen": 87909236, + "step": 1403 + }, + { + "epoch": 4.672212978369385, + "grad_norm": 10.527796745300293, + "learning_rate": 5e-06, + "loss": 0.7106, + "num_input_tokens_seen": 87971780, + "step": 1404 + }, + { + "epoch": 4.672212978369385, + "loss": 0.6194738149642944, + "loss_ce": 0.002530462807044387, + "loss_iou": 0.1513671875, + "loss_num": 0.06298828125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 87971780, + "step": 1404 + }, + { + "epoch": 4.675540765391015, + "grad_norm": 7.018352031707764, + "learning_rate": 5e-06, + "loss": 0.8546, + "num_input_tokens_seen": 88034908, + "step": 1405 + }, + { + "epoch": 4.675540765391015, + "loss": 0.9526970386505127, + "loss_ce": 6.0325139202177525e-05, + "loss_iou": 0.298828125, + "loss_num": 0.07080078125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 88034908, + "step": 1405 + }, + { + "epoch": 4.678868552412646, + "grad_norm": 7.258470058441162, + "learning_rate": 5e-06, + "loss": 0.5011, + "num_input_tokens_seen": 88096500, + "step": 1406 + }, + { + "epoch": 4.678868552412646, + "loss": 0.65594881772995, + "loss_ce": 0.00018714150064624846, + "loss_iou": 0.166015625, + "loss_num": 0.06494140625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 88096500, + "step": 1406 + }, + { + "epoch": 4.6821963394342765, + "grad_norm": 12.497472763061523, + "learning_rate": 5e-06, + "loss": 0.5734, + "num_input_tokens_seen": 88157424, + "step": 1407 + }, + { + "epoch": 4.6821963394342765, + "loss": 0.5287968516349792, + "loss_ce": 0.0008427201537415385, + "loss_iou": 0.18359375, + "loss_num": 0.031982421875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 88157424, + "step": 1407 + }, + { + "epoch": 4.685524126455907, + "grad_norm": 13.06940746307373, + "learning_rate": 5e-06, + "loss": 0.6403, + "num_input_tokens_seen": 88219076, + "step": 1408 + }, + { + "epoch": 4.685524126455907, + "loss": 0.5052746534347534, + "loss_ce": 2.560833490861114e-05, + "loss_iou": 0.140625, + "loss_num": 0.044921875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 88219076, + "step": 1408 + }, + { + "epoch": 4.688851913477538, + "grad_norm": 8.848599433898926, + "learning_rate": 5e-06, + "loss": 0.5396, + "num_input_tokens_seen": 88281864, + "step": 1409 + }, + { + "epoch": 4.688851913477538, + "loss": 0.6765046119689941, + "loss_ce": 0.00011294549040030688, + "loss_iou": 0.1884765625, + "loss_num": 0.059814453125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 88281864, + "step": 1409 + }, + { + "epoch": 4.692179700499168, + "grad_norm": 8.952995300292969, + "learning_rate": 5e-06, + "loss": 0.4109, + "num_input_tokens_seen": 88342824, + "step": 1410 + }, + { + "epoch": 4.692179700499168, + "loss": 0.5194376707077026, + "loss_ce": 0.0006388693582266569, + "loss_iou": 0.1103515625, + "loss_num": 0.0595703125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 88342824, + "step": 1410 + }, + { + "epoch": 4.695507487520799, + "grad_norm": 11.815835952758789, + "learning_rate": 5e-06, + "loss": 0.7675, + "num_input_tokens_seen": 88405772, + "step": 1411 + }, + { + "epoch": 4.695507487520799, + "loss": 0.6887185573577881, + "loss_ce": 0.00018099366570822895, + "loss_iou": 0.2255859375, + "loss_num": 0.04736328125, + "loss_xval": 0.6875, + "num_input_tokens_seen": 88405772, + "step": 1411 + }, + { + "epoch": 4.6988352745424296, + "grad_norm": 8.99443531036377, + "learning_rate": 5e-06, + "loss": 0.6304, + "num_input_tokens_seen": 88468084, + "step": 1412 + }, + { + "epoch": 4.6988352745424296, + "loss": 0.8284512758255005, + "loss_ce": 8.219605660997331e-05, + "loss_iou": 0.244140625, + "loss_num": 0.06787109375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 88468084, + "step": 1412 + }, + { + "epoch": 4.70216306156406, + "grad_norm": 12.500134468078613, + "learning_rate": 5e-06, + "loss": 0.808, + "num_input_tokens_seen": 88528824, + "step": 1413 + }, + { + "epoch": 4.70216306156406, + "loss": 0.9188534021377563, + "loss_ce": 3.0147095458232798e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0703125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 88528824, + "step": 1413 + }, + { + "epoch": 4.705490848585691, + "grad_norm": 8.149920463562012, + "learning_rate": 5e-06, + "loss": 0.5456, + "num_input_tokens_seen": 88591064, + "step": 1414 + }, + { + "epoch": 4.705490848585691, + "loss": 0.4510113000869751, + "loss_ce": 2.2540578356711194e-05, + "loss_iou": 0.10595703125, + "loss_num": 0.0478515625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 88591064, + "step": 1414 + }, + { + "epoch": 4.708818635607321, + "grad_norm": 14.00053882598877, + "learning_rate": 5e-06, + "loss": 0.995, + "num_input_tokens_seen": 88653992, + "step": 1415 + }, + { + "epoch": 4.708818635607321, + "loss": 1.069594144821167, + "loss_ce": 1.4102173736318946e-05, + "loss_iou": 0.419921875, + "loss_num": 0.04638671875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 88653992, + "step": 1415 + }, + { + "epoch": 4.712146422628952, + "grad_norm": 15.54645824432373, + "learning_rate": 5e-06, + "loss": 0.6461, + "num_input_tokens_seen": 88716116, + "step": 1416 + }, + { + "epoch": 4.712146422628952, + "loss": 0.41860735416412354, + "loss_ce": 0.0001808122469810769, + "loss_iou": 0.1484375, + "loss_num": 0.0244140625, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 88716116, + "step": 1416 + }, + { + "epoch": 4.715474209650583, + "grad_norm": 13.604162216186523, + "learning_rate": 5e-06, + "loss": 0.5831, + "num_input_tokens_seen": 88778872, + "step": 1417 + }, + { + "epoch": 4.715474209650583, + "loss": 0.6808904409408569, + "loss_ce": 0.00022634794004261494, + "loss_iou": 0.271484375, + "loss_num": 0.02783203125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 88778872, + "step": 1417 + }, + { + "epoch": 4.718801996672213, + "grad_norm": 22.623563766479492, + "learning_rate": 5e-06, + "loss": 0.5977, + "num_input_tokens_seen": 88840616, + "step": 1418 + }, + { + "epoch": 4.718801996672213, + "loss": 0.7182178497314453, + "loss_ce": 0.0010547826532274485, + "loss_iou": 0.2197265625, + "loss_num": 0.0556640625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 88840616, + "step": 1418 + }, + { + "epoch": 4.722129783693844, + "grad_norm": 25.521087646484375, + "learning_rate": 5e-06, + "loss": 0.9424, + "num_input_tokens_seen": 88904500, + "step": 1419 + }, + { + "epoch": 4.722129783693844, + "loss": 1.0386605262756348, + "loss_ce": 0.0005440718960016966, + "loss_iou": 0.365234375, + "loss_num": 0.0615234375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 88904500, + "step": 1419 + }, + { + "epoch": 4.7254575707154745, + "grad_norm": 32.94253921508789, + "learning_rate": 5e-06, + "loss": 0.6881, + "num_input_tokens_seen": 88966924, + "step": 1420 + }, + { + "epoch": 4.7254575707154745, + "loss": 0.8263764381408691, + "loss_ce": 0.000936914118938148, + "loss_iou": 0.2490234375, + "loss_num": 0.0654296875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 88966924, + "step": 1420 + }, + { + "epoch": 4.728785357737105, + "grad_norm": 12.381750106811523, + "learning_rate": 5e-06, + "loss": 0.6323, + "num_input_tokens_seen": 89030864, + "step": 1421 + }, + { + "epoch": 4.728785357737105, + "loss": 0.665398359298706, + "loss_ce": 0.00011519945110194385, + "loss_iou": 0.208984375, + "loss_num": 0.0498046875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 89030864, + "step": 1421 + }, + { + "epoch": 4.732113144758736, + "grad_norm": 12.26732349395752, + "learning_rate": 5e-06, + "loss": 0.5234, + "num_input_tokens_seen": 89092332, + "step": 1422 + }, + { + "epoch": 4.732113144758736, + "loss": 0.2868744730949402, + "loss_ce": 9.247070920537226e-06, + "loss_iou": 0.0380859375, + "loss_num": 0.042236328125, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 89092332, + "step": 1422 + }, + { + "epoch": 4.735440931780366, + "grad_norm": 7.801537990570068, + "learning_rate": 5e-06, + "loss": 0.8458, + "num_input_tokens_seen": 89155040, + "step": 1423 + }, + { + "epoch": 4.735440931780366, + "loss": 0.6569440364837646, + "loss_ce": 0.00038879140629433095, + "loss_iou": 0.2236328125, + "loss_num": 0.041748046875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 89155040, + "step": 1423 + }, + { + "epoch": 4.738768718801997, + "grad_norm": 17.682525634765625, + "learning_rate": 5e-06, + "loss": 0.7594, + "num_input_tokens_seen": 89217588, + "step": 1424 + }, + { + "epoch": 4.738768718801997, + "loss": 0.6541336178779602, + "loss_ce": 0.0006301991525106132, + "loss_iou": 0.228515625, + "loss_num": 0.039306640625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 89217588, + "step": 1424 + }, + { + "epoch": 4.7420965058236275, + "grad_norm": 19.01534080505371, + "learning_rate": 5e-06, + "loss": 0.7232, + "num_input_tokens_seen": 89281484, + "step": 1425 + }, + { + "epoch": 4.7420965058236275, + "loss": 0.5709552764892578, + "loss_ce": 0.0006427803309634328, + "loss_iou": 0.212890625, + "loss_num": 0.02880859375, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 89281484, + "step": 1425 + }, + { + "epoch": 4.745424292845258, + "grad_norm": 14.757177352905273, + "learning_rate": 5e-06, + "loss": 0.6833, + "num_input_tokens_seen": 89344164, + "step": 1426 + }, + { + "epoch": 4.745424292845258, + "loss": 0.8227342367172241, + "loss_ce": 0.00010239605035167187, + "loss_iou": 0.302734375, + "loss_num": 0.043701171875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 89344164, + "step": 1426 + }, + { + "epoch": 4.748752079866889, + "grad_norm": 16.073335647583008, + "learning_rate": 5e-06, + "loss": 0.7343, + "num_input_tokens_seen": 89407480, + "step": 1427 + }, + { + "epoch": 4.748752079866889, + "loss": 0.6605852842330933, + "loss_ce": 6.27873232588172e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.048095703125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 89407480, + "step": 1427 + }, + { + "epoch": 4.752079866888519, + "grad_norm": 12.472293853759766, + "learning_rate": 5e-06, + "loss": 0.7983, + "num_input_tokens_seen": 89470712, + "step": 1428 + }, + { + "epoch": 4.752079866888519, + "loss": 0.6767415404319763, + "loss_ce": 0.0004719903226941824, + "loss_iou": 0.2392578125, + "loss_num": 0.03955078125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 89470712, + "step": 1428 + }, + { + "epoch": 4.75540765391015, + "grad_norm": 19.07227325439453, + "learning_rate": 5e-06, + "loss": 0.8962, + "num_input_tokens_seen": 89533400, + "step": 1429 + }, + { + "epoch": 4.75540765391015, + "loss": 1.0112426280975342, + "loss_ce": 0.0002563234302215278, + "loss_iou": 0.298828125, + "loss_num": 0.08251953125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 89533400, + "step": 1429 + }, + { + "epoch": 4.758735440931781, + "grad_norm": 11.964755058288574, + "learning_rate": 5e-06, + "loss": 0.7413, + "num_input_tokens_seen": 89597008, + "step": 1430 + }, + { + "epoch": 4.758735440931781, + "loss": 0.42090633511543274, + "loss_ce": 7.881029887357727e-06, + "loss_iou": 0.12451171875, + "loss_num": 0.034423828125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 89597008, + "step": 1430 + }, + { + "epoch": 4.762063227953411, + "grad_norm": 25.646238327026367, + "learning_rate": 5e-06, + "loss": 0.788, + "num_input_tokens_seen": 89660676, + "step": 1431 + }, + { + "epoch": 4.762063227953411, + "loss": 0.9184248447418213, + "loss_ce": 0.0015548146329820156, + "loss_iou": 0.3671875, + "loss_num": 0.036865234375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 89660676, + "step": 1431 + }, + { + "epoch": 4.765391014975042, + "grad_norm": 13.840594291687012, + "learning_rate": 5e-06, + "loss": 0.7977, + "num_input_tokens_seen": 89724804, + "step": 1432 + }, + { + "epoch": 4.765391014975042, + "loss": 0.9030267596244812, + "loss_ce": 0.00019475248700473458, + "loss_iou": 0.3515625, + "loss_num": 0.0400390625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 89724804, + "step": 1432 + }, + { + "epoch": 4.768718801996672, + "grad_norm": 7.904409885406494, + "learning_rate": 5e-06, + "loss": 0.6688, + "num_input_tokens_seen": 89787772, + "step": 1433 + }, + { + "epoch": 4.768718801996672, + "loss": 0.8160985708236694, + "loss_ce": 0.000180591203388758, + "loss_iou": 0.294921875, + "loss_num": 0.044921875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 89787772, + "step": 1433 + }, + { + "epoch": 4.772046589018303, + "grad_norm": 24.053165435791016, + "learning_rate": 5e-06, + "loss": 0.871, + "num_input_tokens_seen": 89851700, + "step": 1434 + }, + { + "epoch": 4.772046589018303, + "loss": 0.7983778715133667, + "loss_ce": 0.000526325951796025, + "loss_iou": 0.306640625, + "loss_num": 0.037109375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 89851700, + "step": 1434 + }, + { + "epoch": 4.775374376039934, + "grad_norm": 17.900188446044922, + "learning_rate": 5e-06, + "loss": 0.9376, + "num_input_tokens_seen": 89914596, + "step": 1435 + }, + { + "epoch": 4.775374376039934, + "loss": 1.1730791330337524, + "loss_ce": 0.00022753766097594053, + "loss_iou": 0.388671875, + "loss_num": 0.0791015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 89914596, + "step": 1435 + }, + { + "epoch": 4.778702163061564, + "grad_norm": 12.668126106262207, + "learning_rate": 5e-06, + "loss": 0.7976, + "num_input_tokens_seen": 89976928, + "step": 1436 + }, + { + "epoch": 4.778702163061564, + "loss": 0.8083900809288025, + "loss_ce": 0.0007728736381977797, + "loss_iou": 0.236328125, + "loss_num": 0.06689453125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 89976928, + "step": 1436 + }, + { + "epoch": 4.782029950083195, + "grad_norm": 13.394881248474121, + "learning_rate": 5e-06, + "loss": 0.7966, + "num_input_tokens_seen": 90040060, + "step": 1437 + }, + { + "epoch": 4.782029950083195, + "loss": 1.0120275020599365, + "loss_ce": 6.466710328822955e-05, + "loss_iou": 0.3203125, + "loss_num": 0.07470703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 90040060, + "step": 1437 + }, + { + "epoch": 4.7853577371048255, + "grad_norm": 11.221780776977539, + "learning_rate": 5e-06, + "loss": 0.7538, + "num_input_tokens_seen": 90103360, + "step": 1438 + }, + { + "epoch": 4.7853577371048255, + "loss": 0.7139836549758911, + "loss_ce": 0.0008489080937579274, + "loss_iou": 0.1953125, + "loss_num": 0.064453125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 90103360, + "step": 1438 + }, + { + "epoch": 4.788685524126456, + "grad_norm": 12.567187309265137, + "learning_rate": 5e-06, + "loss": 0.8503, + "num_input_tokens_seen": 90167208, + "step": 1439 + }, + { + "epoch": 4.788685524126456, + "loss": 0.9829078316688538, + "loss_ce": 0.00024178545572794974, + "loss_iou": 0.34765625, + "loss_num": 0.0576171875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 90167208, + "step": 1439 + }, + { + "epoch": 4.792013311148087, + "grad_norm": 12.651561737060547, + "learning_rate": 5e-06, + "loss": 0.5847, + "num_input_tokens_seen": 90229644, + "step": 1440 + }, + { + "epoch": 4.792013311148087, + "loss": 0.4671025276184082, + "loss_ce": 0.0011601548176258802, + "loss_iou": 0.1279296875, + "loss_num": 0.0419921875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 90229644, + "step": 1440 + }, + { + "epoch": 4.795341098169717, + "grad_norm": 16.65874671936035, + "learning_rate": 5e-06, + "loss": 0.7644, + "num_input_tokens_seen": 90291568, + "step": 1441 + }, + { + "epoch": 4.795341098169717, + "loss": 0.5984110236167908, + "loss_ce": 0.0002665033971425146, + "loss_iou": 0.2060546875, + "loss_num": 0.037109375, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 90291568, + "step": 1441 + }, + { + "epoch": 4.798668885191348, + "grad_norm": 25.318418502807617, + "learning_rate": 5e-06, + "loss": 0.6216, + "num_input_tokens_seen": 90355464, + "step": 1442 + }, + { + "epoch": 4.798668885191348, + "loss": 0.681909441947937, + "loss_ce": 8.57599443406798e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.039794921875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 90355464, + "step": 1442 + }, + { + "epoch": 4.8019966722129785, + "grad_norm": 9.664568901062012, + "learning_rate": 5e-06, + "loss": 0.6261, + "num_input_tokens_seen": 90418132, + "step": 1443 + }, + { + "epoch": 4.8019966722129785, + "loss": 0.6628125309944153, + "loss_ce": 0.001679704524576664, + "loss_iou": 0.20703125, + "loss_num": 0.049560546875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 90418132, + "step": 1443 + }, + { + "epoch": 4.805324459234609, + "grad_norm": 6.967924118041992, + "learning_rate": 5e-06, + "loss": 0.5027, + "num_input_tokens_seen": 90481356, + "step": 1444 + }, + { + "epoch": 4.805324459234609, + "loss": 0.3537171483039856, + "loss_ce": 7.945661491248757e-05, + "loss_iou": 0.10693359375, + "loss_num": 0.0279541015625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 90481356, + "step": 1444 + }, + { + "epoch": 4.80865224625624, + "grad_norm": 24.378341674804688, + "learning_rate": 5e-06, + "loss": 0.7154, + "num_input_tokens_seen": 90544404, + "step": 1445 + }, + { + "epoch": 4.80865224625624, + "loss": 0.7716976404190063, + "loss_ce": 0.00045743631199002266, + "loss_iou": 0.28125, + "loss_num": 0.041748046875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 90544404, + "step": 1445 + }, + { + "epoch": 4.81198003327787, + "grad_norm": 33.42325973510742, + "learning_rate": 5e-06, + "loss": 0.7386, + "num_input_tokens_seen": 90607728, + "step": 1446 + }, + { + "epoch": 4.81198003327787, + "loss": 0.9378975033760071, + "loss_ce": 3.1257190130418167e-05, + "loss_iou": 0.3125, + "loss_num": 0.0625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 90607728, + "step": 1446 + }, + { + "epoch": 4.815307820299501, + "grad_norm": 10.282571792602539, + "learning_rate": 5e-06, + "loss": 0.6003, + "num_input_tokens_seen": 90670312, + "step": 1447 + }, + { + "epoch": 4.815307820299501, + "loss": 0.44258010387420654, + "loss_ce": 7.520718645537272e-05, + "loss_iou": 0.126953125, + "loss_num": 0.03759765625, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 90670312, + "step": 1447 + }, + { + "epoch": 4.818635607321132, + "grad_norm": 9.647857666015625, + "learning_rate": 5e-06, + "loss": 0.8134, + "num_input_tokens_seen": 90734576, + "step": 1448 + }, + { + "epoch": 4.818635607321132, + "loss": 0.7873010635375977, + "loss_ce": 0.0010461233323439956, + "loss_iou": 0.287109375, + "loss_num": 0.04248046875, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 90734576, + "step": 1448 + }, + { + "epoch": 4.821963394342762, + "grad_norm": 16.907331466674805, + "learning_rate": 5e-06, + "loss": 0.669, + "num_input_tokens_seen": 90797412, + "step": 1449 + }, + { + "epoch": 4.821963394342762, + "loss": 0.623419463634491, + "loss_ce": 6.367723472067155e-06, + "loss_iou": 0.23828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.625, + "num_input_tokens_seen": 90797412, + "step": 1449 + }, + { + "epoch": 4.825291181364393, + "grad_norm": 24.30735969543457, + "learning_rate": 5e-06, + "loss": 0.756, + "num_input_tokens_seen": 90858524, + "step": 1450 + }, + { + "epoch": 4.825291181364393, + "loss": 0.6738710999488831, + "loss_ce": 0.0005312650464475155, + "loss_iou": 0.2138671875, + "loss_num": 0.049072265625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 90858524, + "step": 1450 + }, + { + "epoch": 4.8286189683860234, + "grad_norm": 11.708526611328125, + "learning_rate": 5e-06, + "loss": 0.5307, + "num_input_tokens_seen": 90921152, + "step": 1451 + }, + { + "epoch": 4.8286189683860234, + "loss": 0.584744393825531, + "loss_ce": 0.000760018068831414, + "loss_iou": 0.16796875, + "loss_num": 0.0498046875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 90921152, + "step": 1451 + }, + { + "epoch": 4.831946755407654, + "grad_norm": 22.66852378845215, + "learning_rate": 5e-06, + "loss": 0.806, + "num_input_tokens_seen": 90984628, + "step": 1452 + }, + { + "epoch": 4.831946755407654, + "loss": 0.6450674533843994, + "loss_ce": 0.0005361848743632436, + "loss_iou": 0.2412109375, + "loss_num": 0.032470703125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 90984628, + "step": 1452 + }, + { + "epoch": 4.835274542429285, + "grad_norm": 34.99443817138672, + "learning_rate": 5e-06, + "loss": 0.5531, + "num_input_tokens_seen": 91045592, + "step": 1453 + }, + { + "epoch": 4.835274542429285, + "loss": 0.597947895526886, + "loss_ce": 4.751010055770166e-05, + "loss_iou": 0.228515625, + "loss_num": 0.0281982421875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 91045592, + "step": 1453 + }, + { + "epoch": 4.838602329450915, + "grad_norm": 31.513866424560547, + "learning_rate": 5e-06, + "loss": 0.7581, + "num_input_tokens_seen": 91108276, + "step": 1454 + }, + { + "epoch": 4.838602329450915, + "loss": 0.5201447010040283, + "loss_ce": 3.078287591051776e-06, + "loss_iou": 0.1455078125, + "loss_num": 0.0458984375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 91108276, + "step": 1454 + }, + { + "epoch": 4.841930116472546, + "grad_norm": 26.065690994262695, + "learning_rate": 5e-06, + "loss": 0.6533, + "num_input_tokens_seen": 91171596, + "step": 1455 + }, + { + "epoch": 4.841930116472546, + "loss": 0.7839912176132202, + "loss_ce": 0.0002998454438056797, + "loss_iou": 0.2734375, + "loss_num": 0.04736328125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 91171596, + "step": 1455 + }, + { + "epoch": 4.8452579034941765, + "grad_norm": 9.963427543640137, + "learning_rate": 5e-06, + "loss": 0.9933, + "num_input_tokens_seen": 91234328, + "step": 1456 + }, + { + "epoch": 4.8452579034941765, + "loss": 1.1222238540649414, + "loss_ce": 0.0006418825942091644, + "loss_iou": 0.384765625, + "loss_num": 0.0703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 91234328, + "step": 1456 + }, + { + "epoch": 4.848585690515807, + "grad_norm": 17.40574836730957, + "learning_rate": 5e-06, + "loss": 0.7954, + "num_input_tokens_seen": 91298288, + "step": 1457 + }, + { + "epoch": 4.848585690515807, + "loss": 0.5493208765983582, + "loss_ce": 4.492412699619308e-06, + "loss_iou": 0.21875, + "loss_num": 0.022216796875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 91298288, + "step": 1457 + }, + { + "epoch": 4.851913477537438, + "grad_norm": 36.004634857177734, + "learning_rate": 5e-06, + "loss": 0.507, + "num_input_tokens_seen": 91360532, + "step": 1458 + }, + { + "epoch": 4.851913477537438, + "loss": 0.29166799783706665, + "loss_ce": 0.0007134195184335113, + "loss_iou": 0.0390625, + "loss_num": 0.04248046875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 91360532, + "step": 1458 + }, + { + "epoch": 4.855241264559068, + "grad_norm": 24.00579071044922, + "learning_rate": 5e-06, + "loss": 0.6614, + "num_input_tokens_seen": 91423224, + "step": 1459 + }, + { + "epoch": 4.855241264559068, + "loss": 0.8000708222389221, + "loss_ce": 0.0012427503243088722, + "loss_iou": 0.2470703125, + "loss_num": 0.061279296875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 91423224, + "step": 1459 + }, + { + "epoch": 4.858569051580699, + "grad_norm": 7.054749011993408, + "learning_rate": 5e-06, + "loss": 0.7916, + "num_input_tokens_seen": 91486976, + "step": 1460 + }, + { + "epoch": 4.858569051580699, + "loss": 0.6811645030975342, + "loss_ce": 0.00037837924901396036, + "loss_iou": 0.2197265625, + "loss_num": 0.048095703125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 91486976, + "step": 1460 + }, + { + "epoch": 4.86189683860233, + "grad_norm": 15.921218872070312, + "learning_rate": 5e-06, + "loss": 0.8073, + "num_input_tokens_seen": 91549948, + "step": 1461 + }, + { + "epoch": 4.86189683860233, + "loss": 1.1966166496276855, + "loss_ce": 0.0005717898602597415, + "loss_iou": 0.41015625, + "loss_num": 0.07568359375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 91549948, + "step": 1461 + }, + { + "epoch": 4.86522462562396, + "grad_norm": 15.619834899902344, + "learning_rate": 5e-06, + "loss": 0.7384, + "num_input_tokens_seen": 91612528, + "step": 1462 + }, + { + "epoch": 4.86522462562396, + "loss": 0.8267630338668823, + "loss_ce": 0.0005911277839913964, + "loss_iou": 0.28515625, + "loss_num": 0.051513671875, + "loss_xval": 0.828125, + "num_input_tokens_seen": 91612528, + "step": 1462 + }, + { + "epoch": 4.868552412645591, + "grad_norm": 10.34393310546875, + "learning_rate": 5e-06, + "loss": 0.7152, + "num_input_tokens_seen": 91674264, + "step": 1463 + }, + { + "epoch": 4.868552412645591, + "loss": 0.6649792790412903, + "loss_ce": 0.0007947350386530161, + "loss_iou": 0.2333984375, + "loss_num": 0.039306640625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 91674264, + "step": 1463 + }, + { + "epoch": 4.871880199667221, + "grad_norm": 8.09216022491455, + "learning_rate": 5e-06, + "loss": 0.471, + "num_input_tokens_seen": 91737116, + "step": 1464 + }, + { + "epoch": 4.871880199667221, + "loss": 0.45063310861587524, + "loss_ce": 0.00019367330241948366, + "loss_iou": 0.1611328125, + "loss_num": 0.025634765625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 91737116, + "step": 1464 + }, + { + "epoch": 4.875207986688852, + "grad_norm": 17.690427780151367, + "learning_rate": 5e-06, + "loss": 0.7025, + "num_input_tokens_seen": 91799080, + "step": 1465 + }, + { + "epoch": 4.875207986688852, + "loss": 0.6187844276428223, + "loss_ce": 1.0004376235883683e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.034423828125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 91799080, + "step": 1465 + }, + { + "epoch": 4.878535773710483, + "grad_norm": 19.25313377380371, + "learning_rate": 5e-06, + "loss": 0.6483, + "num_input_tokens_seen": 91860988, + "step": 1466 + }, + { + "epoch": 4.878535773710483, + "loss": 0.559953510761261, + "loss_ce": 0.00020009189029224217, + "loss_iou": 0.15234375, + "loss_num": 0.051025390625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 91860988, + "step": 1466 + }, + { + "epoch": 4.881863560732113, + "grad_norm": 10.655545234680176, + "learning_rate": 5e-06, + "loss": 0.7747, + "num_input_tokens_seen": 91924968, + "step": 1467 + }, + { + "epoch": 4.881863560732113, + "loss": 0.636091947555542, + "loss_ce": 0.00034974643494933844, + "loss_iou": 0.26171875, + "loss_num": 0.0225830078125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 91924968, + "step": 1467 + }, + { + "epoch": 4.885191347753744, + "grad_norm": 20.099782943725586, + "learning_rate": 5e-06, + "loss": 0.7424, + "num_input_tokens_seen": 91988252, + "step": 1468 + }, + { + "epoch": 4.885191347753744, + "loss": 0.5978015661239624, + "loss_ce": 0.0008777122711762786, + "loss_iou": 0.2265625, + "loss_num": 0.028564453125, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 91988252, + "step": 1468 + }, + { + "epoch": 4.8885191347753745, + "grad_norm": 17.030847549438477, + "learning_rate": 5e-06, + "loss": 0.6069, + "num_input_tokens_seen": 92049740, + "step": 1469 + }, + { + "epoch": 4.8885191347753745, + "loss": 0.7965136766433716, + "loss_ce": 4.913673365081195e-06, + "loss_iou": 0.24609375, + "loss_num": 0.060791015625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 92049740, + "step": 1469 + }, + { + "epoch": 4.891846921797005, + "grad_norm": 17.971389770507812, + "learning_rate": 5e-06, + "loss": 0.7334, + "num_input_tokens_seen": 92112536, + "step": 1470 + }, + { + "epoch": 4.891846921797005, + "loss": 0.6343286037445068, + "loss_ce": 0.0005395347252488136, + "loss_iou": 0.1923828125, + "loss_num": 0.050048828125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 92112536, + "step": 1470 + }, + { + "epoch": 4.895174708818636, + "grad_norm": 23.792451858520508, + "learning_rate": 5e-06, + "loss": 0.8228, + "num_input_tokens_seen": 92176628, + "step": 1471 + }, + { + "epoch": 4.895174708818636, + "loss": 0.6928781270980835, + "loss_ce": 7.047529834380839e-06, + "loss_iou": 0.263671875, + "loss_num": 0.033447265625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 92176628, + "step": 1471 + }, + { + "epoch": 4.898502495840266, + "grad_norm": 10.210338592529297, + "learning_rate": 5e-06, + "loss": 0.5654, + "num_input_tokens_seen": 92238496, + "step": 1472 + }, + { + "epoch": 4.898502495840266, + "loss": 0.5062616467475891, + "loss_ce": 0.0018671302823349833, + "loss_iou": 0.16796875, + "loss_num": 0.033935546875, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 92238496, + "step": 1472 + }, + { + "epoch": 4.901830282861897, + "grad_norm": 27.42046546936035, + "learning_rate": 5e-06, + "loss": 0.7748, + "num_input_tokens_seen": 92301304, + "step": 1473 + }, + { + "epoch": 4.901830282861897, + "loss": 0.6527865529060364, + "loss_ce": 0.00019865072681568563, + "loss_iou": 0.2099609375, + "loss_num": 0.046630859375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 92301304, + "step": 1473 + }, + { + "epoch": 4.9051580698835275, + "grad_norm": 24.284852981567383, + "learning_rate": 5e-06, + "loss": 0.6917, + "num_input_tokens_seen": 92363496, + "step": 1474 + }, + { + "epoch": 4.9051580698835275, + "loss": 0.5194106101989746, + "loss_ce": 0.00018454447854310274, + "loss_iou": 0.158203125, + "loss_num": 0.040283203125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 92363496, + "step": 1474 + }, + { + "epoch": 4.908485856905158, + "grad_norm": 9.942500114440918, + "learning_rate": 5e-06, + "loss": 0.5869, + "num_input_tokens_seen": 92426528, + "step": 1475 + }, + { + "epoch": 4.908485856905158, + "loss": 0.43600332736968994, + "loss_ce": 0.0009447159245610237, + "loss_iou": 0.0732421875, + "loss_num": 0.0576171875, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 92426528, + "step": 1475 + }, + { + "epoch": 4.911813643926789, + "grad_norm": 12.120987892150879, + "learning_rate": 5e-06, + "loss": 0.5949, + "num_input_tokens_seen": 92488064, + "step": 1476 + }, + { + "epoch": 4.911813643926789, + "loss": 0.6362853050231934, + "loss_ce": 0.0012755857314914465, + "loss_iou": 0.2216796875, + "loss_num": 0.0380859375, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 92488064, + "step": 1476 + }, + { + "epoch": 4.915141430948419, + "grad_norm": 21.779373168945312, + "learning_rate": 5e-06, + "loss": 0.5642, + "num_input_tokens_seen": 92550804, + "step": 1477 + }, + { + "epoch": 4.915141430948419, + "loss": 0.5219756960868835, + "loss_ce": 3.0526402952091303e-06, + "loss_iou": 0.2099609375, + "loss_num": 0.0205078125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 92550804, + "step": 1477 + }, + { + "epoch": 4.91846921797005, + "grad_norm": 41.83907699584961, + "learning_rate": 5e-06, + "loss": 0.8394, + "num_input_tokens_seen": 92614052, + "step": 1478 + }, + { + "epoch": 4.91846921797005, + "loss": 0.7585424780845642, + "loss_ce": 0.0014623773749917746, + "loss_iou": 0.306640625, + "loss_num": 0.0286865234375, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 92614052, + "step": 1478 + }, + { + "epoch": 4.921797004991681, + "grad_norm": 23.484262466430664, + "learning_rate": 5e-06, + "loss": 0.7083, + "num_input_tokens_seen": 92676992, + "step": 1479 + }, + { + "epoch": 4.921797004991681, + "loss": 0.45134830474853516, + "loss_ce": 0.000298495520837605, + "loss_iou": 0.15625, + "loss_num": 0.0277099609375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 92676992, + "step": 1479 + }, + { + "epoch": 4.925124792013311, + "grad_norm": 16.01560401916504, + "learning_rate": 5e-06, + "loss": 0.7104, + "num_input_tokens_seen": 92739476, + "step": 1480 + }, + { + "epoch": 4.925124792013311, + "loss": 0.7750359773635864, + "loss_ce": 1.1565551176317967e-05, + "loss_iou": 0.2890625, + "loss_num": 0.039794921875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 92739476, + "step": 1480 + }, + { + "epoch": 4.928452579034942, + "grad_norm": 14.046684265136719, + "learning_rate": 5e-06, + "loss": 0.5505, + "num_input_tokens_seen": 92801844, + "step": 1481 + }, + { + "epoch": 4.928452579034942, + "loss": 0.6174169778823853, + "loss_ce": 0.000961879501119256, + "loss_iou": 0.1796875, + "loss_num": 0.051513671875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 92801844, + "step": 1481 + }, + { + "epoch": 4.931780366056572, + "grad_norm": 24.918357849121094, + "learning_rate": 5e-06, + "loss": 0.6422, + "num_input_tokens_seen": 92864440, + "step": 1482 + }, + { + "epoch": 4.931780366056572, + "loss": 0.5476626753807068, + "loss_ce": 0.003900481853634119, + "loss_iou": 0.1337890625, + "loss_num": 0.055419921875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 92864440, + "step": 1482 + }, + { + "epoch": 4.935108153078203, + "grad_norm": 12.272085189819336, + "learning_rate": 5e-06, + "loss": 0.5356, + "num_input_tokens_seen": 92926160, + "step": 1483 + }, + { + "epoch": 4.935108153078203, + "loss": 0.41046756505966187, + "loss_ce": 0.0007995814085006714, + "loss_iou": 0.1298828125, + "loss_num": 0.0299072265625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 92926160, + "step": 1483 + }, + { + "epoch": 4.938435940099834, + "grad_norm": 14.432737350463867, + "learning_rate": 5e-06, + "loss": 0.8823, + "num_input_tokens_seen": 92990268, + "step": 1484 + }, + { + "epoch": 4.938435940099834, + "loss": 0.904925525188446, + "loss_ce": 0.0006286612479016185, + "loss_iou": 0.388671875, + "loss_num": 0.02587890625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 92990268, + "step": 1484 + }, + { + "epoch": 4.941763727121464, + "grad_norm": 28.021799087524414, + "learning_rate": 5e-06, + "loss": 0.6162, + "num_input_tokens_seen": 93053936, + "step": 1485 + }, + { + "epoch": 4.941763727121464, + "loss": 0.40271496772766113, + "loss_ce": 5.004140803066548e-06, + "loss_iou": 0.1552734375, + "loss_num": 0.0186767578125, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 93053936, + "step": 1485 + }, + { + "epoch": 4.945091514143095, + "grad_norm": 36.70539093017578, + "learning_rate": 5e-06, + "loss": 0.7026, + "num_input_tokens_seen": 93117408, + "step": 1486 + }, + { + "epoch": 4.945091514143095, + "loss": 0.7068368196487427, + "loss_ce": 0.0009041887824423611, + "loss_iou": 0.21484375, + "loss_num": 0.05517578125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 93117408, + "step": 1486 + }, + { + "epoch": 4.9484193011647255, + "grad_norm": 34.976402282714844, + "learning_rate": 5e-06, + "loss": 0.6626, + "num_input_tokens_seen": 93179780, + "step": 1487 + }, + { + "epoch": 4.9484193011647255, + "loss": 0.5312550067901611, + "loss_ce": 5.038962626713328e-06, + "loss_iou": 0.1689453125, + "loss_num": 0.03857421875, + "loss_xval": 0.53125, + "num_input_tokens_seen": 93179780, + "step": 1487 + }, + { + "epoch": 4.951747088186356, + "grad_norm": 14.874862670898438, + "learning_rate": 5e-06, + "loss": 0.583, + "num_input_tokens_seen": 93242380, + "step": 1488 + }, + { + "epoch": 4.951747088186356, + "loss": 0.5796291828155518, + "loss_ce": 3.933107655029744e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.0380859375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 93242380, + "step": 1488 + }, + { + "epoch": 4.955074875207987, + "grad_norm": 14.622214317321777, + "learning_rate": 5e-06, + "loss": 0.5826, + "num_input_tokens_seen": 93305320, + "step": 1489 + }, + { + "epoch": 4.955074875207987, + "loss": 0.746558666229248, + "loss_ce": 3.767916496144608e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0478515625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 93305320, + "step": 1489 + }, + { + "epoch": 4.958402662229617, + "grad_norm": 20.690074920654297, + "learning_rate": 5e-06, + "loss": 0.8156, + "num_input_tokens_seen": 93367680, + "step": 1490 + }, + { + "epoch": 4.958402662229617, + "loss": 0.8623336553573608, + "loss_ce": 0.0005172833916731179, + "loss_iou": 0.28125, + "loss_num": 0.060302734375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 93367680, + "step": 1490 + }, + { + "epoch": 4.961730449251248, + "grad_norm": 19.866193771362305, + "learning_rate": 5e-06, + "loss": 0.6329, + "num_input_tokens_seen": 93429344, + "step": 1491 + }, + { + "epoch": 4.961730449251248, + "loss": 0.5215956568717957, + "loss_ce": 0.00011129368795081973, + "loss_iou": 0.15234375, + "loss_num": 0.04345703125, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 93429344, + "step": 1491 + }, + { + "epoch": 4.965058236272879, + "grad_norm": 14.48011589050293, + "learning_rate": 5e-06, + "loss": 0.9734, + "num_input_tokens_seen": 93493372, + "step": 1492 + }, + { + "epoch": 4.965058236272879, + "loss": 0.8466031551361084, + "loss_ce": 0.0009000533609651029, + "loss_iou": 0.275390625, + "loss_num": 0.058837890625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 93493372, + "step": 1492 + }, + { + "epoch": 4.968386023294509, + "grad_norm": 12.389137268066406, + "learning_rate": 5e-06, + "loss": 0.8139, + "num_input_tokens_seen": 93556592, + "step": 1493 + }, + { + "epoch": 4.968386023294509, + "loss": 0.672704815864563, + "loss_ce": 0.0007077268091961741, + "loss_iou": 0.21875, + "loss_num": 0.046875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 93556592, + "step": 1493 + }, + { + "epoch": 4.97171381031614, + "grad_norm": 14.919951438903809, + "learning_rate": 5e-06, + "loss": 0.6283, + "num_input_tokens_seen": 93619256, + "step": 1494 + }, + { + "epoch": 4.97171381031614, + "loss": 0.4168055057525635, + "loss_ce": 0.00017951334302779287, + "loss_iou": 0.1123046875, + "loss_num": 0.038330078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 93619256, + "step": 1494 + }, + { + "epoch": 4.97504159733777, + "grad_norm": 34.406837463378906, + "learning_rate": 5e-06, + "loss": 0.7431, + "num_input_tokens_seen": 93681420, + "step": 1495 + }, + { + "epoch": 4.97504159733777, + "loss": 0.7975531220436096, + "loss_ce": 0.0037908926606178284, + "loss_iou": 0.279296875, + "loss_num": 0.046875, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 93681420, + "step": 1495 + }, + { + "epoch": 4.978369384359401, + "grad_norm": 16.960229873657227, + "learning_rate": 5e-06, + "loss": 0.8396, + "num_input_tokens_seen": 93744272, + "step": 1496 + }, + { + "epoch": 4.978369384359401, + "loss": 1.021630048751831, + "loss_ce": 0.00038986955769360065, + "loss_iou": 0.3984375, + "loss_num": 0.044677734375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 93744272, + "step": 1496 + }, + { + "epoch": 4.981697171381032, + "grad_norm": 12.859408378601074, + "learning_rate": 5e-06, + "loss": 0.7763, + "num_input_tokens_seen": 93806256, + "step": 1497 + }, + { + "epoch": 4.981697171381032, + "loss": 0.5506644248962402, + "loss_ce": 0.001958362990990281, + "loss_iou": 0.1806640625, + "loss_num": 0.037841796875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 93806256, + "step": 1497 + }, + { + "epoch": 4.985024958402662, + "grad_norm": 31.596189498901367, + "learning_rate": 5e-06, + "loss": 0.69, + "num_input_tokens_seen": 93869616, + "step": 1498 + }, + { + "epoch": 4.985024958402662, + "loss": 0.746276319026947, + "loss_ce": 0.00018256355542689562, + "loss_iou": 0.267578125, + "loss_num": 0.042236328125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 93869616, + "step": 1498 + }, + { + "epoch": 4.988352745424293, + "grad_norm": 24.65460777282715, + "learning_rate": 5e-06, + "loss": 0.7051, + "num_input_tokens_seen": 93931152, + "step": 1499 + }, + { + "epoch": 4.988352745424293, + "loss": 0.7398415207862854, + "loss_ce": 0.00033956102561205626, + "loss_iou": 0.26953125, + "loss_num": 0.0400390625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 93931152, + "step": 1499 + }, + { + "epoch": 4.9916805324459235, + "grad_norm": 10.513225555419922, + "learning_rate": 5e-06, + "loss": 0.5617, + "num_input_tokens_seen": 93994508, + "step": 1500 + }, + { + "epoch": 4.9916805324459235, + "eval_seeclick_CIoU": 0.12539703398942947, + "eval_seeclick_GIoU": 0.1438598707318306, + "eval_seeclick_IoU": 0.22447702288627625, + "eval_seeclick_MAE_all": 0.1706928089261055, + "eval_seeclick_MAE_h": 0.05414240434765816, + "eval_seeclick_MAE_w": 0.1219484768807888, + "eval_seeclick_MAE_x_boxes": 0.24619252979755402, + "eval_seeclick_MAE_y_boxes": 0.1315431222319603, + "eval_seeclick_NUM_probability": 0.9974748492240906, + "eval_seeclick_inside_bbox": 0.30520834028720856, + "eval_seeclick_loss": 2.665666341781616, + "eval_seeclick_loss_ce": 0.10174492001533508, + "eval_seeclick_loss_iou": 0.855712890625, + "eval_seeclick_loss_num": 0.16909027099609375, + "eval_seeclick_loss_xval": 2.556640625, + "eval_seeclick_runtime": 65.5562, + "eval_seeclick_samples_per_second": 0.717, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 93994508, + "step": 1500 + }, + { + "epoch": 4.9916805324459235, + "eval_icons_CIoU": 0.04876134404912591, + "eval_icons_GIoU": 0.15376271307468414, + "eval_icons_IoU": 0.20101643353700638, + "eval_icons_MAE_all": 0.13954433798789978, + "eval_icons_MAE_h": 0.061773356050252914, + "eval_icons_MAE_w": 0.15250807255506516, + "eval_icons_MAE_x_boxes": 0.1526419073343277, + "eval_icons_MAE_y_boxes": 0.03896789811551571, + "eval_icons_NUM_probability": 0.9999941885471344, + "eval_icons_inside_bbox": 0.3263888955116272, + "eval_icons_loss": 2.3821887969970703, + "eval_icons_loss_ce": 1.977284341592167e-06, + "eval_icons_loss_iou": 0.833984375, + "eval_icons_loss_num": 0.14191246032714844, + "eval_icons_loss_xval": 2.3779296875, + "eval_icons_runtime": 76.6569, + "eval_icons_samples_per_second": 0.652, + "eval_icons_steps_per_second": 0.026, + "num_input_tokens_seen": 93994508, + "step": 1500 + }, + { + "epoch": 4.9916805324459235, + "eval_screenspot_CIoU": 0.05785238494475683, + "eval_screenspot_GIoU": 0.11830167099833488, + "eval_screenspot_IoU": 0.20486934979756674, + "eval_screenspot_MAE_all": 0.1998411019643148, + "eval_screenspot_MAE_h": 0.07882813364267349, + "eval_screenspot_MAE_w": 0.1679569110274315, + "eval_screenspot_MAE_x_boxes": 0.26309264699618023, + "eval_screenspot_MAE_y_boxes": 0.14158939321835837, + "eval_screenspot_NUM_probability": 0.999947190284729, + "eval_screenspot_inside_bbox": 0.34166666865348816, + "eval_screenspot_loss": 2.7864136695861816, + "eval_screenspot_loss_ce": 7.061872323295877e-05, + "eval_screenspot_loss_iou": 0.8956705729166666, + "eval_screenspot_loss_num": 0.204803466796875, + "eval_screenspot_loss_xval": 2.8151041666666665, + "eval_screenspot_runtime": 118.0213, + "eval_screenspot_samples_per_second": 0.754, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 93994508, + "step": 1500 + }, + { + "epoch": 4.9916805324459235, + "eval_compot_CIoU": -0.00016517378389835358, + "eval_compot_GIoU": 0.10418009385466576, + "eval_compot_IoU": 0.16831143200397491, + "eval_compot_MAE_all": 0.18598458915948868, + "eval_compot_MAE_h": 0.07219501584768295, + "eval_compot_MAE_w": 0.24491457641124725, + "eval_compot_MAE_x_boxes": 0.182175874710083, + "eval_compot_MAE_y_boxes": 0.06786558777093887, + "eval_compot_NUM_probability": 0.9999881684780121, + "eval_compot_inside_bbox": 0.2986111119389534, + "eval_compot_loss": 2.7441301345825195, + "eval_compot_loss_ce": 0.004642534884624183, + "eval_compot_loss_iou": 0.90576171875, + "eval_compot_loss_num": 0.1902923583984375, + "eval_compot_loss_xval": 2.7626953125, + "eval_compot_runtime": 67.269, + "eval_compot_samples_per_second": 0.743, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 93994508, + "step": 1500 + }, + { + "epoch": 4.9916805324459235, + "eval_custom_ui_MAE_all": 0.07861483469605446, + "eval_custom_ui_MAE_x": 0.07045421935617924, + "eval_custom_ui_MAE_y": 0.08677546679973602, + "eval_custom_ui_NUM_probability": 0.9999936819076538, + "eval_custom_ui_loss": 0.37946105003356934, + "eval_custom_ui_loss_ce": 9.375693934998708e-06, + "eval_custom_ui_loss_num": 0.0741729736328125, + "eval_custom_ui_loss_xval": 0.37078857421875, + "eval_custom_ui_runtime": 51.6085, + "eval_custom_ui_samples_per_second": 0.969, + "eval_custom_ui_steps_per_second": 0.039, + "num_input_tokens_seen": 93994508, + "step": 1500 + }, + { + "epoch": 4.9916805324459235, + "loss": 0.3743995428085327, + "loss_ce": 9.905661499942653e-06, + "loss_iou": 0.0, + "loss_num": 0.07470703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 93994508, + "step": 1500 + }, + { + "epoch": 4.995008319467554, + "grad_norm": 24.133996963500977, + "learning_rate": 5e-06, + "loss": 0.5173, + "num_input_tokens_seen": 94056528, + "step": 1501 + }, + { + "epoch": 4.995008319467554, + "loss": 0.4280321002006531, + "loss_ce": 5.3581257816404104e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.03271484375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 94056528, + "step": 1501 + }, + { + "epoch": 4.998336106489185, + "grad_norm": 13.59617805480957, + "learning_rate": 5e-06, + "loss": 0.7287, + "num_input_tokens_seen": 94120456, + "step": 1502 + }, + { + "epoch": 4.998336106489185, + "loss": 0.8299544453620911, + "loss_ce": 0.0006087642977945507, + "loss_iou": 0.2734375, + "loss_num": 0.05615234375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 94120456, + "step": 1502 + }, + { + "epoch": 4.998336106489185, + "loss": 0.8235313296318054, + "loss_ce": 4.499550414038822e-05, + "loss_iou": 0.27734375, + "loss_num": 0.05419921875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 94152608, + "step": 1502 + }, + { + "epoch": 5.001663893510815, + "grad_norm": 14.112279891967773, + "learning_rate": 5e-06, + "loss": 0.7359, + "num_input_tokens_seen": 94184688, + "step": 1503 + }, + { + "epoch": 5.001663893510815, + "loss": 0.6481797695159912, + "loss_ce": 0.0002305309899384156, + "loss_iou": 0.251953125, + "loss_num": 0.028564453125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 94184688, + "step": 1503 + }, + { + "epoch": 5.004991680532446, + "grad_norm": 26.775850296020508, + "learning_rate": 5e-06, + "loss": 0.8273, + "num_input_tokens_seen": 94250036, + "step": 1504 + }, + { + "epoch": 5.004991680532446, + "loss": 0.9145175814628601, + "loss_ce": 0.0004551278834696859, + "loss_iou": 0.3515625, + "loss_num": 0.042236328125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 94250036, + "step": 1504 + }, + { + "epoch": 5.0083194675540765, + "grad_norm": 11.916851997375488, + "learning_rate": 5e-06, + "loss": 0.6772, + "num_input_tokens_seen": 94313792, + "step": 1505 + }, + { + "epoch": 5.0083194675540765, + "loss": 0.7098668217658997, + "loss_ce": 0.0008824262768030167, + "loss_iou": 0.255859375, + "loss_num": 0.03955078125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 94313792, + "step": 1505 + }, + { + "epoch": 5.011647254575707, + "grad_norm": 8.042437553405762, + "learning_rate": 5e-06, + "loss": 0.7836, + "num_input_tokens_seen": 94375952, + "step": 1506 + }, + { + "epoch": 5.011647254575707, + "loss": 0.8639097809791565, + "loss_ce": 0.000384394807042554, + "loss_iou": 0.2734375, + "loss_num": 0.06396484375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 94375952, + "step": 1506 + }, + { + "epoch": 5.014975041597338, + "grad_norm": 15.622085571289062, + "learning_rate": 5e-06, + "loss": 0.8583, + "num_input_tokens_seen": 94440520, + "step": 1507 + }, + { + "epoch": 5.014975041597338, + "loss": 1.238585114479065, + "loss_ce": 0.0032335498835891485, + "loss_iou": 0.46484375, + "loss_num": 0.06103515625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 94440520, + "step": 1507 + }, + { + "epoch": 5.018302828618968, + "grad_norm": 31.38747787475586, + "learning_rate": 5e-06, + "loss": 0.7085, + "num_input_tokens_seen": 94500620, + "step": 1508 + }, + { + "epoch": 5.018302828618968, + "loss": 0.5411911010742188, + "loss_ce": 0.00041960144881159067, + "loss_iou": 0.1875, + "loss_num": 0.033203125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 94500620, + "step": 1508 + }, + { + "epoch": 5.021630615640599, + "grad_norm": 24.40467643737793, + "learning_rate": 5e-06, + "loss": 0.755, + "num_input_tokens_seen": 94563172, + "step": 1509 + }, + { + "epoch": 5.021630615640599, + "loss": 0.886728048324585, + "loss_ce": 0.0011079427786171436, + "loss_iou": 0.29296875, + "loss_num": 0.059814453125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 94563172, + "step": 1509 + }, + { + "epoch": 5.02495840266223, + "grad_norm": 28.08826446533203, + "learning_rate": 5e-06, + "loss": 0.7189, + "num_input_tokens_seen": 94626456, + "step": 1510 + }, + { + "epoch": 5.02495840266223, + "loss": 0.5464110374450684, + "loss_ce": 0.0007567618158645928, + "loss_iou": 0.20703125, + "loss_num": 0.0262451171875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 94626456, + "step": 1510 + }, + { + "epoch": 5.02828618968386, + "grad_norm": 32.060081481933594, + "learning_rate": 5e-06, + "loss": 0.6385, + "num_input_tokens_seen": 94689628, + "step": 1511 + }, + { + "epoch": 5.02828618968386, + "loss": 0.6361727714538574, + "loss_ce": 0.00018641223141457886, + "loss_iou": 0.2275390625, + "loss_num": 0.0361328125, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 94689628, + "step": 1511 + }, + { + "epoch": 5.031613976705491, + "grad_norm": 28.850717544555664, + "learning_rate": 5e-06, + "loss": 0.6486, + "num_input_tokens_seen": 94752952, + "step": 1512 + }, + { + "epoch": 5.031613976705491, + "loss": 0.6386792659759521, + "loss_ce": 7.388218818960013e-06, + "loss_iou": 0.2373046875, + "loss_num": 0.032958984375, + "loss_xval": 0.640625, + "num_input_tokens_seen": 94752952, + "step": 1512 + }, + { + "epoch": 5.034941763727121, + "grad_norm": 26.582441329956055, + "learning_rate": 5e-06, + "loss": 0.7219, + "num_input_tokens_seen": 94816220, + "step": 1513 + }, + { + "epoch": 5.034941763727121, + "loss": 0.8012294173240662, + "loss_ce": 0.0014247273793444037, + "loss_iou": 0.28515625, + "loss_num": 0.046142578125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 94816220, + "step": 1513 + }, + { + "epoch": 5.038269550748752, + "grad_norm": 13.041583061218262, + "learning_rate": 5e-06, + "loss": 0.7812, + "num_input_tokens_seen": 94878556, + "step": 1514 + }, + { + "epoch": 5.038269550748752, + "loss": 0.8982586860656738, + "loss_ce": 6.535501597682014e-05, + "loss_iou": 0.26953125, + "loss_num": 0.07177734375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 94878556, + "step": 1514 + }, + { + "epoch": 5.041597337770383, + "grad_norm": 41.85260009765625, + "learning_rate": 5e-06, + "loss": 0.6035, + "num_input_tokens_seen": 94940876, + "step": 1515 + }, + { + "epoch": 5.041597337770383, + "loss": 0.6592752933502197, + "loss_ce": 0.001194267300888896, + "loss_iou": 0.26171875, + "loss_num": 0.027099609375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 94940876, + "step": 1515 + }, + { + "epoch": 5.044925124792013, + "grad_norm": 33.620452880859375, + "learning_rate": 5e-06, + "loss": 0.8496, + "num_input_tokens_seen": 95004780, + "step": 1516 + }, + { + "epoch": 5.044925124792013, + "loss": 1.0527219772338867, + "loss_ce": 0.000597875623498112, + "loss_iou": 0.345703125, + "loss_num": 0.07275390625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 95004780, + "step": 1516 + }, + { + "epoch": 5.048252911813644, + "grad_norm": 14.673892974853516, + "learning_rate": 5e-06, + "loss": 0.5555, + "num_input_tokens_seen": 95067216, + "step": 1517 + }, + { + "epoch": 5.048252911813644, + "loss": 0.41764336824417114, + "loss_ce": 0.0001018413677229546, + "loss_iou": 0.150390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 95067216, + "step": 1517 + }, + { + "epoch": 5.0515806988352745, + "grad_norm": 10.968241691589355, + "learning_rate": 5e-06, + "loss": 0.6427, + "num_input_tokens_seen": 95129008, + "step": 1518 + }, + { + "epoch": 5.0515806988352745, + "loss": 0.5915021896362305, + "loss_ce": 0.0006818820256739855, + "loss_iou": 0.1943359375, + "loss_num": 0.04052734375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 95129008, + "step": 1518 + }, + { + "epoch": 5.054908485856905, + "grad_norm": 26.52812957763672, + "learning_rate": 5e-06, + "loss": 0.7537, + "num_input_tokens_seen": 95192232, + "step": 1519 + }, + { + "epoch": 5.054908485856905, + "loss": 0.903398871421814, + "loss_ce": 0.0005668269586749375, + "loss_iou": 0.330078125, + "loss_num": 0.048828125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 95192232, + "step": 1519 + }, + { + "epoch": 5.058236272878536, + "grad_norm": 30.482860565185547, + "learning_rate": 5e-06, + "loss": 0.8095, + "num_input_tokens_seen": 95256580, + "step": 1520 + }, + { + "epoch": 5.058236272878536, + "loss": 0.7557247877120972, + "loss_ce": 0.0013303017476573586, + "loss_iou": 0.259765625, + "loss_num": 0.047119140625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 95256580, + "step": 1520 + }, + { + "epoch": 5.061564059900166, + "grad_norm": 39.979736328125, + "learning_rate": 5e-06, + "loss": 0.6808, + "num_input_tokens_seen": 95318464, + "step": 1521 + }, + { + "epoch": 5.061564059900166, + "loss": 0.7826014161109924, + "loss_ce": 8.651560165162664e-06, + "loss_iou": 0.310546875, + "loss_num": 0.03271484375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 95318464, + "step": 1521 + }, + { + "epoch": 5.064891846921797, + "grad_norm": 14.766712188720703, + "learning_rate": 5e-06, + "loss": 0.5086, + "num_input_tokens_seen": 95379756, + "step": 1522 + }, + { + "epoch": 5.064891846921797, + "loss": 0.5393363237380981, + "loss_ce": 0.0009147044620476663, + "loss_iou": 0.20703125, + "loss_num": 0.025146484375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 95379756, + "step": 1522 + }, + { + "epoch": 5.068219633943428, + "grad_norm": 7.251678466796875, + "learning_rate": 5e-06, + "loss": 0.4442, + "num_input_tokens_seen": 95441884, + "step": 1523 + }, + { + "epoch": 5.068219633943428, + "loss": 0.4630827307701111, + "loss_ce": 0.00031416656565852463, + "loss_iou": 0.10546875, + "loss_num": 0.050537109375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 95441884, + "step": 1523 + }, + { + "epoch": 5.071547420965058, + "grad_norm": 6.830024242401123, + "learning_rate": 5e-06, + "loss": 0.961, + "num_input_tokens_seen": 95505028, + "step": 1524 + }, + { + "epoch": 5.071547420965058, + "loss": 0.8202039003372192, + "loss_ce": 0.0006237871712073684, + "loss_iou": 0.298828125, + "loss_num": 0.0439453125, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 95505028, + "step": 1524 + }, + { + "epoch": 5.074875207986689, + "grad_norm": 21.969810485839844, + "learning_rate": 5e-06, + "loss": 1.14, + "num_input_tokens_seen": 95568468, + "step": 1525 + }, + { + "epoch": 5.074875207986689, + "loss": 0.9151379466056824, + "loss_ce": 0.0004040507774334401, + "loss_iou": 0.32421875, + "loss_num": 0.053466796875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 95568468, + "step": 1525 + }, + { + "epoch": 5.078202995008319, + "grad_norm": 57.692718505859375, + "learning_rate": 5e-06, + "loss": 0.642, + "num_input_tokens_seen": 95631624, + "step": 1526 + }, + { + "epoch": 5.078202995008319, + "loss": 0.7038395404815674, + "loss_ce": 0.00010419228055980057, + "loss_iou": 0.25390625, + "loss_num": 0.039306640625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 95631624, + "step": 1526 + }, + { + "epoch": 5.08153078202995, + "grad_norm": 10.670079231262207, + "learning_rate": 5e-06, + "loss": 0.8521, + "num_input_tokens_seen": 95694840, + "step": 1527 + }, + { + "epoch": 5.08153078202995, + "loss": 0.6557031273841858, + "loss_ce": 0.0011621195590123534, + "loss_iou": 0.26171875, + "loss_num": 0.0262451171875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 95694840, + "step": 1527 + }, + { + "epoch": 5.084858569051581, + "grad_norm": 14.62645435333252, + "learning_rate": 5e-06, + "loss": 0.734, + "num_input_tokens_seen": 95757612, + "step": 1528 + }, + { + "epoch": 5.084858569051581, + "loss": 0.6704707145690918, + "loss_ce": 0.0006709391018375754, + "loss_iou": 0.2255859375, + "loss_num": 0.043701171875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 95757612, + "step": 1528 + }, + { + "epoch": 5.088186356073211, + "grad_norm": 16.821863174438477, + "learning_rate": 5e-06, + "loss": 0.9306, + "num_input_tokens_seen": 95821208, + "step": 1529 + }, + { + "epoch": 5.088186356073211, + "loss": 1.184844970703125, + "loss_ce": 3.0552084353985265e-05, + "loss_iou": 0.390625, + "loss_num": 0.0810546875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 95821208, + "step": 1529 + }, + { + "epoch": 5.091514143094842, + "grad_norm": 79.41419982910156, + "learning_rate": 5e-06, + "loss": 0.7217, + "num_input_tokens_seen": 95883876, + "step": 1530 + }, + { + "epoch": 5.091514143094842, + "loss": 0.8382284641265869, + "loss_ce": 0.0009482197929173708, + "loss_iou": 0.333984375, + "loss_num": 0.033447265625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 95883876, + "step": 1530 + }, + { + "epoch": 5.0948419301164725, + "grad_norm": 25.368114471435547, + "learning_rate": 5e-06, + "loss": 0.6514, + "num_input_tokens_seen": 95947076, + "step": 1531 + }, + { + "epoch": 5.0948419301164725, + "loss": 0.7189469337463379, + "loss_ce": 1.380766843794845e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.048828125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 95947076, + "step": 1531 + }, + { + "epoch": 5.098169717138103, + "grad_norm": 13.117644309997559, + "learning_rate": 5e-06, + "loss": 0.8008, + "num_input_tokens_seen": 96009676, + "step": 1532 + }, + { + "epoch": 5.098169717138103, + "loss": 0.8573014736175537, + "loss_ce": 0.00012374338984955102, + "loss_iou": 0.234375, + "loss_num": 0.07763671875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 96009676, + "step": 1532 + }, + { + "epoch": 5.101497504159734, + "grad_norm": 10.874475479125977, + "learning_rate": 5e-06, + "loss": 0.376, + "num_input_tokens_seen": 96071412, + "step": 1533 + }, + { + "epoch": 5.101497504159734, + "loss": 0.4256836473941803, + "loss_ce": 2.4467857656418346e-05, + "loss_iou": 0.154296875, + "loss_num": 0.0233154296875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 96071412, + "step": 1533 + }, + { + "epoch": 5.104825291181364, + "grad_norm": 15.079007148742676, + "learning_rate": 5e-06, + "loss": 0.5986, + "num_input_tokens_seen": 96133696, + "step": 1534 + }, + { + "epoch": 5.104825291181364, + "loss": 0.5522220134735107, + "loss_ce": 0.001440738094970584, + "loss_iou": 0.1865234375, + "loss_num": 0.03564453125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 96133696, + "step": 1534 + }, + { + "epoch": 5.108153078202995, + "grad_norm": 12.708340644836426, + "learning_rate": 5e-06, + "loss": 0.7919, + "num_input_tokens_seen": 96197404, + "step": 1535 + }, + { + "epoch": 5.108153078202995, + "loss": 0.9537239670753479, + "loss_ce": 0.001331356936134398, + "loss_iou": 0.349609375, + "loss_num": 0.051025390625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 96197404, + "step": 1535 + }, + { + "epoch": 5.1114808652246255, + "grad_norm": 9.230545043945312, + "learning_rate": 5e-06, + "loss": 0.534, + "num_input_tokens_seen": 96260424, + "step": 1536 + }, + { + "epoch": 5.1114808652246255, + "loss": 0.5380446314811707, + "loss_ce": 0.0006911091622896492, + "loss_iou": 0.177734375, + "loss_num": 0.0361328125, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 96260424, + "step": 1536 + }, + { + "epoch": 5.114808652246256, + "grad_norm": 7.485297203063965, + "learning_rate": 5e-06, + "loss": 0.6481, + "num_input_tokens_seen": 96324052, + "step": 1537 + }, + { + "epoch": 5.114808652246256, + "loss": 0.8440206050872803, + "loss_ce": 0.00014855540939606726, + "loss_iou": 0.296875, + "loss_num": 0.05029296875, + "loss_xval": 0.84375, + "num_input_tokens_seen": 96324052, + "step": 1537 + }, + { + "epoch": 5.118136439267887, + "grad_norm": 12.548988342285156, + "learning_rate": 5e-06, + "loss": 0.7396, + "num_input_tokens_seen": 96386140, + "step": 1538 + }, + { + "epoch": 5.118136439267887, + "loss": 0.907487154006958, + "loss_ce": 1.645735392230563e-05, + "loss_iou": 0.30859375, + "loss_num": 0.05810546875, + "loss_xval": 0.90625, + "num_input_tokens_seen": 96386140, + "step": 1538 + }, + { + "epoch": 5.121464226289517, + "grad_norm": 9.125192642211914, + "learning_rate": 5e-06, + "loss": 0.5798, + "num_input_tokens_seen": 96445552, + "step": 1539 + }, + { + "epoch": 5.121464226289517, + "loss": 0.5199623703956604, + "loss_ce": 3.884044417645782e-06, + "loss_iou": 0.1630859375, + "loss_num": 0.0390625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 96445552, + "step": 1539 + }, + { + "epoch": 5.124792013311148, + "grad_norm": 9.516877174377441, + "learning_rate": 5e-06, + "loss": 0.4506, + "num_input_tokens_seen": 96508364, + "step": 1540 + }, + { + "epoch": 5.124792013311148, + "loss": 0.5682762265205383, + "loss_ce": 0.00016097842308226973, + "loss_iou": 0.2353515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 96508364, + "step": 1540 + }, + { + "epoch": 5.128119800332779, + "grad_norm": 13.660243034362793, + "learning_rate": 5e-06, + "loss": 0.7161, + "num_input_tokens_seen": 96571852, + "step": 1541 + }, + { + "epoch": 5.128119800332779, + "loss": 0.8170957565307617, + "loss_ce": 0.002154330490157008, + "loss_iou": 0.279296875, + "loss_num": 0.051025390625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 96571852, + "step": 1541 + }, + { + "epoch": 5.131447587354409, + "grad_norm": 12.244997024536133, + "learning_rate": 5e-06, + "loss": 0.8757, + "num_input_tokens_seen": 96637356, + "step": 1542 + }, + { + "epoch": 5.131447587354409, + "loss": 0.7130774259567261, + "loss_ce": 0.001651703380048275, + "loss_iou": 0.25390625, + "loss_num": 0.040771484375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 96637356, + "step": 1542 + }, + { + "epoch": 5.13477537437604, + "grad_norm": 10.925747871398926, + "learning_rate": 5e-06, + "loss": 0.6949, + "num_input_tokens_seen": 96698348, + "step": 1543 + }, + { + "epoch": 5.13477537437604, + "loss": 0.516825795173645, + "loss_ce": 0.000468335987534374, + "loss_iou": 0.16796875, + "loss_num": 0.035888671875, + "loss_xval": 0.515625, + "num_input_tokens_seen": 96698348, + "step": 1543 + }, + { + "epoch": 5.13810316139767, + "grad_norm": 16.20490074157715, + "learning_rate": 5e-06, + "loss": 0.8087, + "num_input_tokens_seen": 96759328, + "step": 1544 + }, + { + "epoch": 5.13810316139767, + "loss": 0.8088467121124268, + "loss_ce": 8.813009117147885e-06, + "loss_iou": 0.28125, + "loss_num": 0.049560546875, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 96759328, + "step": 1544 + }, + { + "epoch": 5.141430948419301, + "grad_norm": 13.553998947143555, + "learning_rate": 5e-06, + "loss": 0.9186, + "num_input_tokens_seen": 96823196, + "step": 1545 + }, + { + "epoch": 5.141430948419301, + "loss": 0.8833928108215332, + "loss_ce": 0.0019231259357184172, + "loss_iou": 0.3515625, + "loss_num": 0.035400390625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 96823196, + "step": 1545 + }, + { + "epoch": 5.144758735440932, + "grad_norm": 16.64641571044922, + "learning_rate": 5e-06, + "loss": 0.5477, + "num_input_tokens_seen": 96886160, + "step": 1546 + }, + { + "epoch": 5.144758735440932, + "loss": 0.5232193470001221, + "loss_ce": 2.6001296646427363e-05, + "loss_iou": 0.1533203125, + "loss_num": 0.043212890625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 96886160, + "step": 1546 + }, + { + "epoch": 5.148086522462562, + "grad_norm": 12.320746421813965, + "learning_rate": 5e-06, + "loss": 0.8047, + "num_input_tokens_seen": 96949040, + "step": 1547 + }, + { + "epoch": 5.148086522462562, + "loss": 0.7521053552627563, + "loss_ce": 3.014335743500851e-05, + "loss_iou": 0.251953125, + "loss_num": 0.04931640625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 96949040, + "step": 1547 + }, + { + "epoch": 5.151414309484193, + "grad_norm": 8.744440078735352, + "learning_rate": 5e-06, + "loss": 0.8747, + "num_input_tokens_seen": 97011576, + "step": 1548 + }, + { + "epoch": 5.151414309484193, + "loss": 0.8263041973114014, + "loss_ce": 0.0001323087781202048, + "loss_iou": 0.30078125, + "loss_num": 0.045166015625, + "loss_xval": 0.828125, + "num_input_tokens_seen": 97011576, + "step": 1548 + }, + { + "epoch": 5.1547420965058235, + "grad_norm": 8.398408889770508, + "learning_rate": 5e-06, + "loss": 0.6888, + "num_input_tokens_seen": 97073700, + "step": 1549 + }, + { + "epoch": 5.1547420965058235, + "loss": 0.6298340559005737, + "loss_ce": 0.0016601935494691133, + "loss_iou": 0.2109375, + "loss_num": 0.041259765625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 97073700, + "step": 1549 + }, + { + "epoch": 5.158069883527454, + "grad_norm": 13.537564277648926, + "learning_rate": 5e-06, + "loss": 0.703, + "num_input_tokens_seen": 97137488, + "step": 1550 + }, + { + "epoch": 5.158069883527454, + "loss": 0.791496992111206, + "loss_ce": 0.0006034679245203733, + "loss_iou": 0.31640625, + "loss_num": 0.03125, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 97137488, + "step": 1550 + }, + { + "epoch": 5.161397670549085, + "grad_norm": 35.76241683959961, + "learning_rate": 5e-06, + "loss": 0.7945, + "num_input_tokens_seen": 97198004, + "step": 1551 + }, + { + "epoch": 5.161397670549085, + "loss": 0.7911405563354492, + "loss_ce": 2.8721469789161347e-06, + "loss_iou": 0.267578125, + "loss_num": 0.05078125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 97198004, + "step": 1551 + }, + { + "epoch": 5.164725457570715, + "grad_norm": 14.66862964630127, + "learning_rate": 5e-06, + "loss": 0.6592, + "num_input_tokens_seen": 97260336, + "step": 1552 + }, + { + "epoch": 5.164725457570715, + "loss": 0.5582370162010193, + "loss_ce": 0.00025361799634993076, + "loss_iou": 0.1865234375, + "loss_num": 0.036865234375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 97260336, + "step": 1552 + }, + { + "epoch": 5.168053244592346, + "grad_norm": 8.499744415283203, + "learning_rate": 5e-06, + "loss": 0.5118, + "num_input_tokens_seen": 97322608, + "step": 1553 + }, + { + "epoch": 5.168053244592346, + "loss": 0.5915481448173523, + "loss_ce": 0.0005447297007776797, + "loss_iou": 0.2265625, + "loss_num": 0.027587890625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 97322608, + "step": 1553 + }, + { + "epoch": 5.1713810316139766, + "grad_norm": 13.193866729736328, + "learning_rate": 5e-06, + "loss": 0.5383, + "num_input_tokens_seen": 97385600, + "step": 1554 + }, + { + "epoch": 5.1713810316139766, + "loss": 0.35793358087539673, + "loss_ce": 2.3437820345861837e-05, + "loss_iou": 0.095703125, + "loss_num": 0.033203125, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 97385600, + "step": 1554 + }, + { + "epoch": 5.174708818635607, + "grad_norm": 19.962928771972656, + "learning_rate": 5e-06, + "loss": 0.6914, + "num_input_tokens_seen": 97447764, + "step": 1555 + }, + { + "epoch": 5.174708818635607, + "loss": 0.6495640277862549, + "loss_ce": 0.0005161368753761053, + "loss_iou": 0.208984375, + "loss_num": 0.046142578125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 97447764, + "step": 1555 + }, + { + "epoch": 5.178036605657238, + "grad_norm": 15.584821701049805, + "learning_rate": 5e-06, + "loss": 0.737, + "num_input_tokens_seen": 97510692, + "step": 1556 + }, + { + "epoch": 5.178036605657238, + "loss": 0.7587304711341858, + "loss_ce": 0.0005517329555004835, + "loss_iou": 0.28515625, + "loss_num": 0.037353515625, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 97510692, + "step": 1556 + }, + { + "epoch": 5.181364392678868, + "grad_norm": 16.34670066833496, + "learning_rate": 5e-06, + "loss": 1.0405, + "num_input_tokens_seen": 97573088, + "step": 1557 + }, + { + "epoch": 5.181364392678868, + "loss": 1.0706849098205566, + "loss_ce": 0.00012822201824747026, + "loss_iou": 0.33203125, + "loss_num": 0.08154296875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 97573088, + "step": 1557 + }, + { + "epoch": 5.184692179700499, + "grad_norm": 24.74776840209961, + "learning_rate": 5e-06, + "loss": 0.7951, + "num_input_tokens_seen": 97636512, + "step": 1558 + }, + { + "epoch": 5.184692179700499, + "loss": 0.9686456918716431, + "loss_ce": 1.7786769603844732e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0517578125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 97636512, + "step": 1558 + }, + { + "epoch": 5.18801996672213, + "grad_norm": 8.181451797485352, + "learning_rate": 5e-06, + "loss": 0.7342, + "num_input_tokens_seen": 97699408, + "step": 1559 + }, + { + "epoch": 5.18801996672213, + "loss": 0.6913556456565857, + "loss_ce": 0.0006818252149969339, + "loss_iou": 0.296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 97699408, + "step": 1559 + }, + { + "epoch": 5.19134775374376, + "grad_norm": 9.973201751708984, + "learning_rate": 5e-06, + "loss": 0.7311, + "num_input_tokens_seen": 97761664, + "step": 1560 + }, + { + "epoch": 5.19134775374376, + "loss": 0.5677616596221924, + "loss_ce": 1.2609091754711699e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.046142578125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 97761664, + "step": 1560 + }, + { + "epoch": 5.194675540765391, + "grad_norm": 13.940975189208984, + "learning_rate": 5e-06, + "loss": 0.5023, + "num_input_tokens_seen": 97823884, + "step": 1561 + }, + { + "epoch": 5.194675540765391, + "loss": 0.5250530242919922, + "loss_ce": 2.8577667762874626e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.039306640625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 97823884, + "step": 1561 + }, + { + "epoch": 5.1980033277870215, + "grad_norm": 10.562603950500488, + "learning_rate": 5e-06, + "loss": 0.6535, + "num_input_tokens_seen": 97886020, + "step": 1562 + }, + { + "epoch": 5.1980033277870215, + "loss": 0.6427997350692749, + "loss_ce": 0.0007098691421560943, + "loss_iou": 0.23046875, + "loss_num": 0.036376953125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 97886020, + "step": 1562 + }, + { + "epoch": 5.201331114808652, + "grad_norm": 20.083114624023438, + "learning_rate": 5e-06, + "loss": 0.6058, + "num_input_tokens_seen": 97947632, + "step": 1563 + }, + { + "epoch": 5.201331114808652, + "loss": 0.564403772354126, + "loss_ce": 0.0003778576210606843, + "loss_iou": 0.1953125, + "loss_num": 0.034912109375, + "loss_xval": 0.5625, + "num_input_tokens_seen": 97947632, + "step": 1563 + }, + { + "epoch": 5.204658901830283, + "grad_norm": 34.775455474853516, + "learning_rate": 5e-06, + "loss": 0.8295, + "num_input_tokens_seen": 98010084, + "step": 1564 + }, + { + "epoch": 5.204658901830283, + "loss": 0.667966902256012, + "loss_ce": 5.918982424191199e-05, + "loss_iou": 0.25390625, + "loss_num": 0.032470703125, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 98010084, + "step": 1564 + }, + { + "epoch": 5.207986688851913, + "grad_norm": 14.078563690185547, + "learning_rate": 5e-06, + "loss": 0.5324, + "num_input_tokens_seen": 98072816, + "step": 1565 + }, + { + "epoch": 5.207986688851913, + "loss": 0.29302480816841125, + "loss_ce": 0.00048331127618439496, + "loss_iou": 0.0693359375, + "loss_num": 0.03076171875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 98072816, + "step": 1565 + }, + { + "epoch": 5.211314475873544, + "grad_norm": 12.865399360656738, + "learning_rate": 5e-06, + "loss": 0.6898, + "num_input_tokens_seen": 98135732, + "step": 1566 + }, + { + "epoch": 5.211314475873544, + "loss": 0.6527900695800781, + "loss_ce": 0.0006904720212332904, + "loss_iou": 0.1826171875, + "loss_num": 0.057373046875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 98135732, + "step": 1566 + }, + { + "epoch": 5.2146422628951745, + "grad_norm": 30.85588836669922, + "learning_rate": 5e-06, + "loss": 0.7293, + "num_input_tokens_seen": 98200008, + "step": 1567 + }, + { + "epoch": 5.2146422628951745, + "loss": 0.7736085057258606, + "loss_ce": 0.00041512242751196027, + "loss_iou": 0.30078125, + "loss_num": 0.0341796875, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 98200008, + "step": 1567 + }, + { + "epoch": 5.217970049916805, + "grad_norm": 29.402868270874023, + "learning_rate": 5e-06, + "loss": 0.8229, + "num_input_tokens_seen": 98264616, + "step": 1568 + }, + { + "epoch": 5.217970049916805, + "loss": 0.63280189037323, + "loss_ce": 0.00072178163100034, + "loss_iou": 0.2294921875, + "loss_num": 0.034423828125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 98264616, + "step": 1568 + }, + { + "epoch": 5.221297836938436, + "grad_norm": 20.66412925720215, + "learning_rate": 5e-06, + "loss": 0.6474, + "num_input_tokens_seen": 98327500, + "step": 1569 + }, + { + "epoch": 5.221297836938436, + "loss": 0.8841537833213806, + "loss_ce": 0.00024261744692921638, + "loss_iou": 0.322265625, + "loss_num": 0.048095703125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 98327500, + "step": 1569 + }, + { + "epoch": 5.224625623960066, + "grad_norm": 7.796050071716309, + "learning_rate": 5e-06, + "loss": 0.6683, + "num_input_tokens_seen": 98390560, + "step": 1570 + }, + { + "epoch": 5.224625623960066, + "loss": 0.6554022431373596, + "loss_ce": 6.712816229992313e-06, + "loss_iou": 0.26953125, + "loss_num": 0.0230712890625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 98390560, + "step": 1570 + }, + { + "epoch": 5.227953410981697, + "grad_norm": 22.678808212280273, + "learning_rate": 5e-06, + "loss": 0.743, + "num_input_tokens_seen": 98451556, + "step": 1571 + }, + { + "epoch": 5.227953410981697, + "loss": 0.8813857436180115, + "loss_ce": 3.806907989201136e-05, + "loss_iou": 0.318359375, + "loss_num": 0.048828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 98451556, + "step": 1571 + }, + { + "epoch": 5.231281198003328, + "grad_norm": 41.750709533691406, + "learning_rate": 5e-06, + "loss": 1.0732, + "num_input_tokens_seen": 98514720, + "step": 1572 + }, + { + "epoch": 5.231281198003328, + "loss": 1.1592261791229248, + "loss_ce": 0.0010230218758806586, + "loss_iou": 0.3515625, + "loss_num": 0.0908203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 98514720, + "step": 1572 + }, + { + "epoch": 5.234608985024958, + "grad_norm": 45.86041259765625, + "learning_rate": 5e-06, + "loss": 0.7024, + "num_input_tokens_seen": 98578124, + "step": 1573 + }, + { + "epoch": 5.234608985024958, + "loss": 0.7642949819564819, + "loss_ce": 0.0006078842561691999, + "loss_iou": 0.267578125, + "loss_num": 0.0458984375, + "loss_xval": 0.765625, + "num_input_tokens_seen": 98578124, + "step": 1573 + }, + { + "epoch": 5.237936772046589, + "grad_norm": 13.958196640014648, + "learning_rate": 5e-06, + "loss": 0.8009, + "num_input_tokens_seen": 98642292, + "step": 1574 + }, + { + "epoch": 5.237936772046589, + "loss": 0.6747276782989502, + "loss_ce": 0.0016319530550390482, + "loss_iou": 0.1943359375, + "loss_num": 0.056884765625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 98642292, + "step": 1574 + }, + { + "epoch": 5.241264559068219, + "grad_norm": 1243.618896484375, + "learning_rate": 5e-06, + "loss": 0.774, + "num_input_tokens_seen": 98704420, + "step": 1575 + }, + { + "epoch": 5.241264559068219, + "loss": 0.8179287314414978, + "loss_ce": 0.0005458915256895125, + "loss_iou": 0.279296875, + "loss_num": 0.0517578125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 98704420, + "step": 1575 + }, + { + "epoch": 5.24459234608985, + "grad_norm": 10.360309600830078, + "learning_rate": 5e-06, + "loss": 0.7435, + "num_input_tokens_seen": 98769476, + "step": 1576 + }, + { + "epoch": 5.24459234608985, + "loss": 0.7698988914489746, + "loss_ce": 0.0002455537032801658, + "loss_iou": 0.302734375, + "loss_num": 0.03271484375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 98769476, + "step": 1576 + }, + { + "epoch": 5.247920133111481, + "grad_norm": 7.8587565422058105, + "learning_rate": 5e-06, + "loss": 0.4282, + "num_input_tokens_seen": 98830068, + "step": 1577 + }, + { + "epoch": 5.247920133111481, + "loss": 0.5417072772979736, + "loss_ce": 2.0255403796909377e-05, + "loss_iou": 0.18359375, + "loss_num": 0.03515625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 98830068, + "step": 1577 + }, + { + "epoch": 5.251247920133111, + "grad_norm": 8.830144882202148, + "learning_rate": 5e-06, + "loss": 0.7409, + "num_input_tokens_seen": 98892736, + "step": 1578 + }, + { + "epoch": 5.251247920133111, + "loss": 0.6595664620399475, + "loss_ce": 2.0516697986749932e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.03466796875, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 98892736, + "step": 1578 + }, + { + "epoch": 5.254575707154742, + "grad_norm": 15.969278335571289, + "learning_rate": 5e-06, + "loss": 0.6557, + "num_input_tokens_seen": 98956260, + "step": 1579 + }, + { + "epoch": 5.254575707154742, + "loss": 0.4240087568759918, + "loss_ce": 0.0003026947088073939, + "loss_iou": 0.1484375, + "loss_num": 0.0252685546875, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 98956260, + "step": 1579 + }, + { + "epoch": 5.2579034941763725, + "grad_norm": 12.759567260742188, + "learning_rate": 5e-06, + "loss": 0.5799, + "num_input_tokens_seen": 99017636, + "step": 1580 + }, + { + "epoch": 5.2579034941763725, + "loss": 0.4847814440727234, + "loss_ce": 0.001138861756771803, + "loss_iou": 0.1572265625, + "loss_num": 0.03369140625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 99017636, + "step": 1580 + }, + { + "epoch": 5.261231281198003, + "grad_norm": 7.065427780151367, + "learning_rate": 5e-06, + "loss": 0.6962, + "num_input_tokens_seen": 99081064, + "step": 1581 + }, + { + "epoch": 5.261231281198003, + "loss": 0.7337771654129028, + "loss_ce": 0.0007449376862496138, + "loss_iou": 0.25, + "loss_num": 0.046142578125, + "loss_xval": 0.734375, + "num_input_tokens_seen": 99081064, + "step": 1581 + }, + { + "epoch": 5.264559068219634, + "grad_norm": 12.841547012329102, + "learning_rate": 5e-06, + "loss": 0.7063, + "num_input_tokens_seen": 99143616, + "step": 1582 + }, + { + "epoch": 5.264559068219634, + "loss": 0.6888269186019897, + "loss_ce": 0.00022827113571111113, + "loss_iou": 0.271484375, + "loss_num": 0.0286865234375, + "loss_xval": 0.6875, + "num_input_tokens_seen": 99143616, + "step": 1582 + }, + { + "epoch": 5.267886855241264, + "grad_norm": 21.898542404174805, + "learning_rate": 5e-06, + "loss": 0.7441, + "num_input_tokens_seen": 99206620, + "step": 1583 + }, + { + "epoch": 5.267886855241264, + "loss": 1.0425233840942383, + "loss_ce": 0.00040918824379332364, + "loss_iou": 0.408203125, + "loss_num": 0.044921875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 99206620, + "step": 1583 + }, + { + "epoch": 5.271214642262895, + "grad_norm": 18.596426010131836, + "learning_rate": 5e-06, + "loss": 0.7307, + "num_input_tokens_seen": 99270064, + "step": 1584 + }, + { + "epoch": 5.271214642262895, + "loss": 0.7801717519760132, + "loss_ce": 0.00038661700091324747, + "loss_iou": 0.314453125, + "loss_num": 0.030517578125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 99270064, + "step": 1584 + }, + { + "epoch": 5.2745424292845255, + "grad_norm": 23.57819366455078, + "learning_rate": 5e-06, + "loss": 0.8169, + "num_input_tokens_seen": 99333904, + "step": 1585 + }, + { + "epoch": 5.2745424292845255, + "loss": 0.5337511897087097, + "loss_ce": 0.0007311389781534672, + "loss_iou": 0.1923828125, + "loss_num": 0.029541015625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 99333904, + "step": 1585 + }, + { + "epoch": 5.277870216306156, + "grad_norm": 37.10131072998047, + "learning_rate": 5e-06, + "loss": 0.7919, + "num_input_tokens_seen": 99398088, + "step": 1586 + }, + { + "epoch": 5.277870216306156, + "loss": 0.803447961807251, + "loss_ce": 0.0004694595991168171, + "loss_iou": 0.328125, + "loss_num": 0.02978515625, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 99398088, + "step": 1586 + }, + { + "epoch": 5.281198003327787, + "grad_norm": 19.156675338745117, + "learning_rate": 5e-06, + "loss": 0.6396, + "num_input_tokens_seen": 99459368, + "step": 1587 + }, + { + "epoch": 5.281198003327787, + "loss": 0.9905081391334534, + "loss_ce": 2.9618571716127917e-05, + "loss_iou": 0.328125, + "loss_num": 0.06640625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 99459368, + "step": 1587 + }, + { + "epoch": 5.284525790349417, + "grad_norm": 8.31318473815918, + "learning_rate": 5e-06, + "loss": 0.4815, + "num_input_tokens_seen": 99520824, + "step": 1588 + }, + { + "epoch": 5.284525790349417, + "loss": 0.33191707730293274, + "loss_ce": 0.0004961899248883128, + "loss_iou": 0.09375, + "loss_num": 0.0286865234375, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 99520824, + "step": 1588 + }, + { + "epoch": 5.287853577371048, + "grad_norm": 9.956439971923828, + "learning_rate": 5e-06, + "loss": 0.6581, + "num_input_tokens_seen": 99581932, + "step": 1589 + }, + { + "epoch": 5.287853577371048, + "loss": 0.7131649255752563, + "loss_ce": 3.017848575836979e-05, + "loss_iou": 0.26953125, + "loss_num": 0.034912109375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 99581932, + "step": 1589 + }, + { + "epoch": 5.291181364392679, + "grad_norm": 7.714879512786865, + "learning_rate": 5e-06, + "loss": 0.7462, + "num_input_tokens_seen": 99644652, + "step": 1590 + }, + { + "epoch": 5.291181364392679, + "loss": 0.7490200996398926, + "loss_ce": 0.0003017770650330931, + "loss_iou": 0.259765625, + "loss_num": 0.0458984375, + "loss_xval": 0.75, + "num_input_tokens_seen": 99644652, + "step": 1590 + }, + { + "epoch": 5.294509151414309, + "grad_norm": 28.146652221679688, + "learning_rate": 5e-06, + "loss": 0.6564, + "num_input_tokens_seen": 99708500, + "step": 1591 + }, + { + "epoch": 5.294509151414309, + "loss": 0.8539837598800659, + "loss_ce": 0.0013836309080943465, + "loss_iou": 0.3515625, + "loss_num": 0.030029296875, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 99708500, + "step": 1591 + }, + { + "epoch": 5.29783693843594, + "grad_norm": 19.461881637573242, + "learning_rate": 5e-06, + "loss": 0.6438, + "num_input_tokens_seen": 99771208, + "step": 1592 + }, + { + "epoch": 5.29783693843594, + "loss": 0.6133031845092773, + "loss_ce": 2.1985513740219176e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.039306640625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 99771208, + "step": 1592 + }, + { + "epoch": 5.3011647254575704, + "grad_norm": 26.864931106567383, + "learning_rate": 5e-06, + "loss": 0.5504, + "num_input_tokens_seen": 99833760, + "step": 1593 + }, + { + "epoch": 5.3011647254575704, + "loss": 0.5138239860534668, + "loss_ce": 0.0006403637235052884, + "loss_iou": 0.1865234375, + "loss_num": 0.028076171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 99833760, + "step": 1593 + }, + { + "epoch": 5.304492512479201, + "grad_norm": 8.363646507263184, + "learning_rate": 5e-06, + "loss": 0.7576, + "num_input_tokens_seen": 99897804, + "step": 1594 + }, + { + "epoch": 5.304492512479201, + "loss": 0.7315871715545654, + "loss_ce": 0.0003860450815409422, + "loss_iou": 0.2041015625, + "loss_num": 0.06494140625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 99897804, + "step": 1594 + }, + { + "epoch": 5.307820299500832, + "grad_norm": 69.57489013671875, + "learning_rate": 5e-06, + "loss": 0.5305, + "num_input_tokens_seen": 99960196, + "step": 1595 + }, + { + "epoch": 5.307820299500832, + "loss": 0.6942192912101746, + "loss_ce": 0.00018853397341445088, + "loss_iou": 0.28515625, + "loss_num": 0.0245361328125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 99960196, + "step": 1595 + }, + { + "epoch": 5.311148086522462, + "grad_norm": 11.756884574890137, + "learning_rate": 5e-06, + "loss": 0.6475, + "num_input_tokens_seen": 100022584, + "step": 1596 + }, + { + "epoch": 5.311148086522462, + "loss": 0.7136390805244446, + "loss_ce": 1.6043655705288984e-05, + "loss_iou": 0.2099609375, + "loss_num": 0.058837890625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 100022584, + "step": 1596 + }, + { + "epoch": 5.314475873544093, + "grad_norm": 8.837007522583008, + "learning_rate": 5e-06, + "loss": 0.6514, + "num_input_tokens_seen": 100086124, + "step": 1597 + }, + { + "epoch": 5.314475873544093, + "loss": 0.6199404001235962, + "loss_ce": 0.0003114750434178859, + "loss_iou": 0.2216796875, + "loss_num": 0.035400390625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 100086124, + "step": 1597 + }, + { + "epoch": 5.3178036605657235, + "grad_norm": 8.616264343261719, + "learning_rate": 5e-06, + "loss": 0.7488, + "num_input_tokens_seen": 100148952, + "step": 1598 + }, + { + "epoch": 5.3178036605657235, + "loss": 0.45825719833374023, + "loss_ce": 5.2268542276578955e-06, + "loss_iou": 0.1591796875, + "loss_num": 0.02783203125, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 100148952, + "step": 1598 + }, + { + "epoch": 5.321131447587354, + "grad_norm": 12.141873359680176, + "learning_rate": 5e-06, + "loss": 0.5747, + "num_input_tokens_seen": 100210792, + "step": 1599 + }, + { + "epoch": 5.321131447587354, + "loss": 0.4171640872955322, + "loss_ce": 0.0004160656244494021, + "loss_iou": 0.1513671875, + "loss_num": 0.02294921875, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 100210792, + "step": 1599 + }, + { + "epoch": 5.324459234608985, + "grad_norm": 17.452781677246094, + "learning_rate": 5e-06, + "loss": 0.5113, + "num_input_tokens_seen": 100272944, + "step": 1600 + }, + { + "epoch": 5.324459234608985, + "loss": 0.5932729840278625, + "loss_ce": 0.0009878204436972737, + "loss_iou": 0.22265625, + "loss_num": 0.0294189453125, + "loss_xval": 0.59375, + "num_input_tokens_seen": 100272944, + "step": 1600 + }, + { + "epoch": 5.327787021630615, + "grad_norm": 13.12947940826416, + "learning_rate": 5e-06, + "loss": 0.4211, + "num_input_tokens_seen": 100336000, + "step": 1601 + }, + { + "epoch": 5.327787021630615, + "loss": 0.34248068928718567, + "loss_ce": 0.00013448798563331366, + "loss_iou": 0.12255859375, + "loss_num": 0.0194091796875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 100336000, + "step": 1601 + }, + { + "epoch": 5.331114808652246, + "grad_norm": 16.778797149658203, + "learning_rate": 5e-06, + "loss": 0.9173, + "num_input_tokens_seen": 100398688, + "step": 1602 + }, + { + "epoch": 5.331114808652246, + "loss": 0.812654972076416, + "loss_ce": 0.00015499009168706834, + "loss_iou": 0.3203125, + "loss_num": 0.034912109375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 100398688, + "step": 1602 + }, + { + "epoch": 5.334442595673877, + "grad_norm": 41.81510543823242, + "learning_rate": 5e-06, + "loss": 0.7542, + "num_input_tokens_seen": 100461044, + "step": 1603 + }, + { + "epoch": 5.334442595673877, + "loss": 0.9035730361938477, + "loss_ce": 8.593749953433871e-06, + "loss_iou": 0.279296875, + "loss_num": 0.06884765625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 100461044, + "step": 1603 + }, + { + "epoch": 5.337770382695507, + "grad_norm": 27.792001724243164, + "learning_rate": 5e-06, + "loss": 0.6624, + "num_input_tokens_seen": 100524272, + "step": 1604 + }, + { + "epoch": 5.337770382695507, + "loss": 0.4387521743774414, + "loss_ce": 3.14875396725256e-05, + "loss_iou": 0.158203125, + "loss_num": 0.0244140625, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 100524272, + "step": 1604 + }, + { + "epoch": 5.341098169717138, + "grad_norm": 10.232015609741211, + "learning_rate": 5e-06, + "loss": 0.6419, + "num_input_tokens_seen": 100586896, + "step": 1605 + }, + { + "epoch": 5.341098169717138, + "loss": 0.7033101320266724, + "loss_ce": 0.00018513888062443584, + "loss_iou": 0.26171875, + "loss_num": 0.035888671875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 100586896, + "step": 1605 + }, + { + "epoch": 5.344425956738768, + "grad_norm": 9.116806983947754, + "learning_rate": 5e-06, + "loss": 0.5147, + "num_input_tokens_seen": 100649132, + "step": 1606 + }, + { + "epoch": 5.344425956738768, + "loss": 0.42553964257240295, + "loss_ce": 2.5271147023886442e-06, + "loss_iou": 0.146484375, + "loss_num": 0.0264892578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 100649132, + "step": 1606 + }, + { + "epoch": 5.347753743760399, + "grad_norm": 24.01177978515625, + "learning_rate": 5e-06, + "loss": 0.7176, + "num_input_tokens_seen": 100712640, + "step": 1607 + }, + { + "epoch": 5.347753743760399, + "loss": 0.8379149436950684, + "loss_ce": 0.0006346513982862234, + "loss_iou": 0.302734375, + "loss_num": 0.046630859375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 100712640, + "step": 1607 + }, + { + "epoch": 5.35108153078203, + "grad_norm": 17.368818283081055, + "learning_rate": 5e-06, + "loss": 0.6702, + "num_input_tokens_seen": 100775344, + "step": 1608 + }, + { + "epoch": 5.35108153078203, + "loss": 0.8547390699386597, + "loss_ce": 2.7412108920543687e-06, + "loss_iou": 0.326171875, + "loss_num": 0.040771484375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 100775344, + "step": 1608 + }, + { + "epoch": 5.35440931780366, + "grad_norm": 19.727764129638672, + "learning_rate": 5e-06, + "loss": 0.7125, + "num_input_tokens_seen": 100839532, + "step": 1609 + }, + { + "epoch": 5.35440931780366, + "loss": 0.8060716390609741, + "loss_ce": 0.0013841248583048582, + "loss_iou": 0.287109375, + "loss_num": 0.04638671875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 100839532, + "step": 1609 + }, + { + "epoch": 5.357737104825291, + "grad_norm": 14.09434700012207, + "learning_rate": 5e-06, + "loss": 0.6749, + "num_input_tokens_seen": 100903348, + "step": 1610 + }, + { + "epoch": 5.357737104825291, + "loss": 0.7217329740524292, + "loss_ce": 0.0001753973338054493, + "loss_iou": 0.2578125, + "loss_num": 0.04150390625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 100903348, + "step": 1610 + }, + { + "epoch": 5.3610648918469215, + "grad_norm": 36.83820343017578, + "learning_rate": 5e-06, + "loss": 0.5817, + "num_input_tokens_seen": 100964912, + "step": 1611 + }, + { + "epoch": 5.3610648918469215, + "loss": 0.6560474634170532, + "loss_ce": 4.160653043072671e-05, + "loss_iou": 0.216796875, + "loss_num": 0.04443359375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 100964912, + "step": 1611 + }, + { + "epoch": 5.364392678868552, + "grad_norm": 11.305191993713379, + "learning_rate": 5e-06, + "loss": 0.4619, + "num_input_tokens_seen": 101026424, + "step": 1612 + }, + { + "epoch": 5.364392678868552, + "loss": 0.6465030908584595, + "loss_ce": 1.8739890947472304e-05, + "loss_iou": 0.2373046875, + "loss_num": 0.03466796875, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 101026424, + "step": 1612 + }, + { + "epoch": 5.367720465890183, + "grad_norm": 11.862913131713867, + "learning_rate": 5e-06, + "loss": 0.8137, + "num_input_tokens_seen": 101090924, + "step": 1613 + }, + { + "epoch": 5.367720465890183, + "loss": 0.8782895803451538, + "loss_ce": 0.00023779345792718232, + "loss_iou": 0.32421875, + "loss_num": 0.046142578125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 101090924, + "step": 1613 + }, + { + "epoch": 5.371048252911813, + "grad_norm": 18.821861267089844, + "learning_rate": 5e-06, + "loss": 0.3774, + "num_input_tokens_seen": 101153632, + "step": 1614 + }, + { + "epoch": 5.371048252911813, + "loss": 0.4713307321071625, + "loss_ce": 1.724835419736337e-05, + "loss_iou": 0.1875, + "loss_num": 0.019287109375, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 101153632, + "step": 1614 + }, + { + "epoch": 5.374376039933444, + "grad_norm": 18.81785774230957, + "learning_rate": 5e-06, + "loss": 0.4723, + "num_input_tokens_seen": 101216952, + "step": 1615 + }, + { + "epoch": 5.374376039933444, + "loss": 0.5333555936813354, + "loss_ce": 0.0013731429353356361, + "loss_iou": 0.169921875, + "loss_num": 0.0380859375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 101216952, + "step": 1615 + }, + { + "epoch": 5.3777038269550745, + "grad_norm": 8.743084907531738, + "learning_rate": 5e-06, + "loss": 0.5347, + "num_input_tokens_seen": 101278632, + "step": 1616 + }, + { + "epoch": 5.3777038269550745, + "loss": 0.5145453214645386, + "loss_ce": 0.00014100028784014285, + "loss_iou": 0.1455078125, + "loss_num": 0.044677734375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 101278632, + "step": 1616 + }, + { + "epoch": 5.381031613976705, + "grad_norm": 15.498371124267578, + "learning_rate": 5e-06, + "loss": 0.8427, + "num_input_tokens_seen": 101341908, + "step": 1617 + }, + { + "epoch": 5.381031613976705, + "loss": 0.5787339210510254, + "loss_ce": 0.00012060911831213161, + "loss_iou": 0.2216796875, + "loss_num": 0.027099609375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 101341908, + "step": 1617 + }, + { + "epoch": 5.384359400998336, + "grad_norm": 25.164121627807617, + "learning_rate": 5e-06, + "loss": 0.5582, + "num_input_tokens_seen": 101403536, + "step": 1618 + }, + { + "epoch": 5.384359400998336, + "loss": 0.4660763144493103, + "loss_ce": 1.1841300874948502e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.038818359375, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 101403536, + "step": 1618 + }, + { + "epoch": 5.387687188019966, + "grad_norm": 10.2332763671875, + "learning_rate": 5e-06, + "loss": 0.6704, + "num_input_tokens_seen": 101467088, + "step": 1619 + }, + { + "epoch": 5.387687188019966, + "loss": 0.490899920463562, + "loss_ce": 0.00042139904689975083, + "loss_iou": 0.1962890625, + "loss_num": 0.019775390625, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 101467088, + "step": 1619 + }, + { + "epoch": 5.391014975041597, + "grad_norm": 19.87059783935547, + "learning_rate": 5e-06, + "loss": 0.5354, + "num_input_tokens_seen": 101529148, + "step": 1620 + }, + { + "epoch": 5.391014975041597, + "loss": 0.5794316530227661, + "loss_ce": 0.0011235260171815753, + "loss_iou": 0.1826171875, + "loss_num": 0.042724609375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 101529148, + "step": 1620 + }, + { + "epoch": 5.394342762063228, + "grad_norm": 11.307812690734863, + "learning_rate": 5e-06, + "loss": 0.6216, + "num_input_tokens_seen": 101592284, + "step": 1621 + }, + { + "epoch": 5.394342762063228, + "loss": 0.6232434511184692, + "loss_ce": 0.0006848212797194719, + "loss_iou": 0.228515625, + "loss_num": 0.033203125, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 101592284, + "step": 1621 + }, + { + "epoch": 5.397670549084858, + "grad_norm": 22.18927001953125, + "learning_rate": 5e-06, + "loss": 0.6218, + "num_input_tokens_seen": 101654680, + "step": 1622 + }, + { + "epoch": 5.397670549084858, + "loss": 0.717785120010376, + "loss_ce": 0.0004999724333174527, + "loss_iou": 0.234375, + "loss_num": 0.0498046875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 101654680, + "step": 1622 + }, + { + "epoch": 5.400998336106489, + "grad_norm": 14.127787590026855, + "learning_rate": 5e-06, + "loss": 0.7036, + "num_input_tokens_seen": 101717188, + "step": 1623 + }, + { + "epoch": 5.400998336106489, + "loss": 0.8181177377700806, + "loss_ce": 0.00036871584597975016, + "loss_iou": 0.28125, + "loss_num": 0.05078125, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 101717188, + "step": 1623 + }, + { + "epoch": 5.404326123128119, + "grad_norm": 15.851911544799805, + "learning_rate": 5e-06, + "loss": 0.7727, + "num_input_tokens_seen": 101780468, + "step": 1624 + }, + { + "epoch": 5.404326123128119, + "loss": 0.8187301158905029, + "loss_ce": 0.00018765513959806412, + "loss_iou": 0.34375, + "loss_num": 0.0263671875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 101780468, + "step": 1624 + }, + { + "epoch": 5.40765391014975, + "grad_norm": 7.549576759338379, + "learning_rate": 5e-06, + "loss": 0.4095, + "num_input_tokens_seen": 101842308, + "step": 1625 + }, + { + "epoch": 5.40765391014975, + "loss": 0.38657066226005554, + "loss_ce": 0.0008284934447146952, + "loss_iou": 0.08984375, + "loss_num": 0.041015625, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 101842308, + "step": 1625 + }, + { + "epoch": 5.410981697171381, + "grad_norm": 9.882723808288574, + "learning_rate": 5e-06, + "loss": 0.6379, + "num_input_tokens_seen": 101903744, + "step": 1626 + }, + { + "epoch": 5.410981697171381, + "loss": 0.6620615720748901, + "loss_ce": 0.0009287626016885042, + "loss_iou": 0.25, + "loss_num": 0.0322265625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 101903744, + "step": 1626 + }, + { + "epoch": 5.414309484193011, + "grad_norm": 14.373669624328613, + "learning_rate": 5e-06, + "loss": 0.8154, + "num_input_tokens_seen": 101967284, + "step": 1627 + }, + { + "epoch": 5.414309484193011, + "loss": 0.6620774865150452, + "loss_ce": 0.0007005495717749, + "loss_iou": 0.2412109375, + "loss_num": 0.03564453125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 101967284, + "step": 1627 + }, + { + "epoch": 5.417637271214642, + "grad_norm": 18.5496768951416, + "learning_rate": 5e-06, + "loss": 0.6312, + "num_input_tokens_seen": 102029524, + "step": 1628 + }, + { + "epoch": 5.417637271214642, + "loss": 0.7796862721443176, + "loss_ce": 0.0016101216897368431, + "loss_iou": 0.279296875, + "loss_num": 0.043701171875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 102029524, + "step": 1628 + }, + { + "epoch": 5.4209650582362725, + "grad_norm": 14.205424308776855, + "learning_rate": 5e-06, + "loss": 0.5817, + "num_input_tokens_seen": 102093696, + "step": 1629 + }, + { + "epoch": 5.4209650582362725, + "loss": 0.5077414512634277, + "loss_ce": 0.00017305587243754417, + "loss_iou": 0.1845703125, + "loss_num": 0.027587890625, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 102093696, + "step": 1629 + }, + { + "epoch": 5.424292845257903, + "grad_norm": 9.155461311340332, + "learning_rate": 5e-06, + "loss": 0.5884, + "num_input_tokens_seen": 102155176, + "step": 1630 + }, + { + "epoch": 5.424292845257903, + "loss": 0.5432171821594238, + "loss_ce": 0.0003704904520418495, + "loss_iou": 0.1826171875, + "loss_num": 0.035400390625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 102155176, + "step": 1630 + }, + { + "epoch": 5.427620632279534, + "grad_norm": 8.620030403137207, + "learning_rate": 5e-06, + "loss": 0.6747, + "num_input_tokens_seen": 102217376, + "step": 1631 + }, + { + "epoch": 5.427620632279534, + "loss": 0.6632170081138611, + "loss_ce": 9.006463187688496e-06, + "loss_iou": 0.212890625, + "loss_num": 0.047119140625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 102217376, + "step": 1631 + }, + { + "epoch": 5.430948419301164, + "grad_norm": 10.027673721313477, + "learning_rate": 5e-06, + "loss": 0.5378, + "num_input_tokens_seen": 102281180, + "step": 1632 + }, + { + "epoch": 5.430948419301164, + "loss": 0.47565510869026184, + "loss_ce": 0.00019121626974083483, + "loss_iou": 0.1591796875, + "loss_num": 0.031494140625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 102281180, + "step": 1632 + }, + { + "epoch": 5.434276206322795, + "grad_norm": 52.82054901123047, + "learning_rate": 5e-06, + "loss": 0.8202, + "num_input_tokens_seen": 102343984, + "step": 1633 + }, + { + "epoch": 5.434276206322795, + "loss": 0.6174899935722351, + "loss_ce": 0.0004245634190738201, + "loss_iou": 0.1689453125, + "loss_num": 0.055908203125, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 102343984, + "step": 1633 + }, + { + "epoch": 5.437603993344426, + "grad_norm": 13.176911354064941, + "learning_rate": 5e-06, + "loss": 0.9236, + "num_input_tokens_seen": 102407440, + "step": 1634 + }, + { + "epoch": 5.437603993344426, + "loss": 0.7702251076698303, + "loss_ce": 0.0005717647145502269, + "loss_iou": 0.2353515625, + "loss_num": 0.06005859375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 102407440, + "step": 1634 + }, + { + "epoch": 5.440931780366056, + "grad_norm": 10.518148422241211, + "learning_rate": 5e-06, + "loss": 0.7139, + "num_input_tokens_seen": 102470212, + "step": 1635 + }, + { + "epoch": 5.440931780366056, + "loss": 0.7326650619506836, + "loss_ce": 0.0007314276299439371, + "loss_iou": 0.22265625, + "loss_num": 0.056884765625, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 102470212, + "step": 1635 + }, + { + "epoch": 5.444259567387687, + "grad_norm": 15.578624725341797, + "learning_rate": 5e-06, + "loss": 0.8894, + "num_input_tokens_seen": 102534180, + "step": 1636 + }, + { + "epoch": 5.444259567387687, + "loss": 1.045713186264038, + "loss_ce": 0.0006692318129353225, + "loss_iou": 0.33984375, + "loss_num": 0.07275390625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 102534180, + "step": 1636 + }, + { + "epoch": 5.447587354409317, + "grad_norm": 11.064337730407715, + "learning_rate": 5e-06, + "loss": 0.6611, + "num_input_tokens_seen": 102596244, + "step": 1637 + }, + { + "epoch": 5.447587354409317, + "loss": 0.8054574728012085, + "loss_ce": 0.00015959216398186982, + "loss_iou": 0.2578125, + "loss_num": 0.05810546875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 102596244, + "step": 1637 + }, + { + "epoch": 5.450915141430948, + "grad_norm": 33.97342300415039, + "learning_rate": 5e-06, + "loss": 0.6283, + "num_input_tokens_seen": 102658532, + "step": 1638 + }, + { + "epoch": 5.450915141430948, + "loss": 0.6672518849372864, + "loss_ce": 0.0009921238524839282, + "loss_iou": 0.263671875, + "loss_num": 0.0281982421875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 102658532, + "step": 1638 + }, + { + "epoch": 5.454242928452579, + "grad_norm": 8.071794509887695, + "learning_rate": 5e-06, + "loss": 0.6823, + "num_input_tokens_seen": 102720076, + "step": 1639 + }, + { + "epoch": 5.454242928452579, + "loss": 0.8720945715904236, + "loss_ce": 2.4301192752318457e-05, + "loss_iou": 0.298828125, + "loss_num": 0.054443359375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 102720076, + "step": 1639 + }, + { + "epoch": 5.457570715474209, + "grad_norm": 9.842574119567871, + "learning_rate": 5e-06, + "loss": 0.7563, + "num_input_tokens_seen": 102782416, + "step": 1640 + }, + { + "epoch": 5.457570715474209, + "loss": 0.6718653440475464, + "loss_ce": 0.0009668688289821148, + "loss_iou": 0.232421875, + "loss_num": 0.041259765625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 102782416, + "step": 1640 + }, + { + "epoch": 5.46089850249584, + "grad_norm": 16.268909454345703, + "learning_rate": 5e-06, + "loss": 0.7966, + "num_input_tokens_seen": 102845652, + "step": 1641 + }, + { + "epoch": 5.46089850249584, + "loss": 0.8888282775878906, + "loss_ce": 0.0006446776678785682, + "loss_iou": 0.341796875, + "loss_num": 0.04150390625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 102845652, + "step": 1641 + }, + { + "epoch": 5.4642262895174705, + "grad_norm": 14.76564884185791, + "learning_rate": 5e-06, + "loss": 0.7013, + "num_input_tokens_seen": 102906500, + "step": 1642 + }, + { + "epoch": 5.4642262895174705, + "loss": 0.7110059261322021, + "loss_ce": 0.0001294732792302966, + "loss_iou": 0.1591796875, + "loss_num": 0.07861328125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 102906500, + "step": 1642 + }, + { + "epoch": 5.467554076539101, + "grad_norm": 19.5927791595459, + "learning_rate": 5e-06, + "loss": 0.5605, + "num_input_tokens_seen": 102969944, + "step": 1643 + }, + { + "epoch": 5.467554076539101, + "loss": 0.7769869565963745, + "loss_ce": 9.381456948176492e-06, + "loss_iou": 0.296875, + "loss_num": 0.036865234375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 102969944, + "step": 1643 + }, + { + "epoch": 5.470881863560733, + "grad_norm": 16.91885757446289, + "learning_rate": 5e-06, + "loss": 0.7235, + "num_input_tokens_seen": 103033352, + "step": 1644 + }, + { + "epoch": 5.470881863560733, + "loss": 0.531306803226471, + "loss_ce": 0.0009113232372328639, + "loss_iou": 0.1767578125, + "loss_num": 0.03564453125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 103033352, + "step": 1644 + }, + { + "epoch": 5.474209650582363, + "grad_norm": 16.343990325927734, + "learning_rate": 5e-06, + "loss": 0.7818, + "num_input_tokens_seen": 103097296, + "step": 1645 + }, + { + "epoch": 5.474209650582363, + "loss": 1.0094261169433594, + "loss_ce": 0.0006371331983245909, + "loss_iou": 0.357421875, + "loss_num": 0.058837890625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 103097296, + "step": 1645 + }, + { + "epoch": 5.477537437603994, + "grad_norm": 7.790471076965332, + "learning_rate": 5e-06, + "loss": 0.5469, + "num_input_tokens_seen": 103159720, + "step": 1646 + }, + { + "epoch": 5.477537437603994, + "loss": 0.3706333041191101, + "loss_ce": 2.7853264327859506e-05, + "loss_iou": 0.1318359375, + "loss_num": 0.021484375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 103159720, + "step": 1646 + }, + { + "epoch": 5.480865224625624, + "grad_norm": 37.15055847167969, + "learning_rate": 5e-06, + "loss": 0.7048, + "num_input_tokens_seen": 103222660, + "step": 1647 + }, + { + "epoch": 5.480865224625624, + "loss": 0.6658989787101746, + "loss_ce": 0.0004937172634527087, + "loss_iou": 0.24609375, + "loss_num": 0.03466796875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 103222660, + "step": 1647 + }, + { + "epoch": 5.484193011647255, + "grad_norm": 27.953083038330078, + "learning_rate": 5e-06, + "loss": 0.8436, + "num_input_tokens_seen": 103286316, + "step": 1648 + }, + { + "epoch": 5.484193011647255, + "loss": 0.8316751718521118, + "loss_ce": 1.0130894224857911e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 103286316, + "step": 1648 + }, + { + "epoch": 5.487520798668886, + "grad_norm": 20.30833625793457, + "learning_rate": 5e-06, + "loss": 0.6182, + "num_input_tokens_seen": 103349840, + "step": 1649 + }, + { + "epoch": 5.487520798668886, + "loss": 0.5982181429862976, + "loss_ce": 1.259712917089928e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.025390625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 103349840, + "step": 1649 + }, + { + "epoch": 5.490848585690516, + "grad_norm": 20.08171272277832, + "learning_rate": 5e-06, + "loss": 0.5541, + "num_input_tokens_seen": 103413016, + "step": 1650 + }, + { + "epoch": 5.490848585690516, + "loss": 0.7060139775276184, + "loss_ce": 0.0004475625464692712, + "loss_iou": 0.287109375, + "loss_num": 0.02587890625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 103413016, + "step": 1650 + }, + { + "epoch": 5.494176372712147, + "grad_norm": 8.847830772399902, + "learning_rate": 5e-06, + "loss": 0.7636, + "num_input_tokens_seen": 103477308, + "step": 1651 + }, + { + "epoch": 5.494176372712147, + "loss": 0.9405819773674011, + "loss_ce": 0.0003964377101510763, + "loss_iou": 0.3671875, + "loss_num": 0.041259765625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 103477308, + "step": 1651 + }, + { + "epoch": 5.4975041597337775, + "grad_norm": 13.779448509216309, + "learning_rate": 5e-06, + "loss": 0.835, + "num_input_tokens_seen": 103539928, + "step": 1652 + }, + { + "epoch": 5.4975041597337775, + "loss": 1.0582094192504883, + "loss_ce": 0.0008363999659195542, + "loss_iou": 0.326171875, + "loss_num": 0.0810546875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 103539928, + "step": 1652 + }, + { + "epoch": 5.500831946755408, + "grad_norm": 21.17045021057129, + "learning_rate": 5e-06, + "loss": 0.6832, + "num_input_tokens_seen": 103602172, + "step": 1653 + }, + { + "epoch": 5.500831946755408, + "loss": 0.42666590213775635, + "loss_ce": 0.00039636611472815275, + "loss_iou": 0.1689453125, + "loss_num": 0.0174560546875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 103602172, + "step": 1653 + }, + { + "epoch": 5.504159733777039, + "grad_norm": 20.944114685058594, + "learning_rate": 5e-06, + "loss": 0.6127, + "num_input_tokens_seen": 103663200, + "step": 1654 + }, + { + "epoch": 5.504159733777039, + "loss": 0.7828108668327332, + "loss_ce": 9.606579260434955e-05, + "loss_iou": 0.263671875, + "loss_num": 0.051025390625, + "loss_xval": 0.78125, + "num_input_tokens_seen": 103663200, + "step": 1654 + }, + { + "epoch": 5.507487520798669, + "grad_norm": 8.944890022277832, + "learning_rate": 5e-06, + "loss": 0.6528, + "num_input_tokens_seen": 103726100, + "step": 1655 + }, + { + "epoch": 5.507487520798669, + "loss": 0.6580873727798462, + "loss_ce": 0.00012836034875363111, + "loss_iou": 0.25390625, + "loss_num": 0.0303955078125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 103726100, + "step": 1655 + }, + { + "epoch": 5.5108153078203, + "grad_norm": 11.251640319824219, + "learning_rate": 5e-06, + "loss": 0.7174, + "num_input_tokens_seen": 103788604, + "step": 1656 + }, + { + "epoch": 5.5108153078203, + "loss": 0.6210998296737671, + "loss_ce": 6.068248694646172e-06, + "loss_iou": 0.2060546875, + "loss_num": 0.041748046875, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 103788604, + "step": 1656 + }, + { + "epoch": 5.5141430948419305, + "grad_norm": 18.31918716430664, + "learning_rate": 5e-06, + "loss": 0.6866, + "num_input_tokens_seen": 103851268, + "step": 1657 + }, + { + "epoch": 5.5141430948419305, + "loss": 0.780825138092041, + "loss_ce": 0.0007958478527143598, + "loss_iou": 0.236328125, + "loss_num": 0.0615234375, + "loss_xval": 0.78125, + "num_input_tokens_seen": 103851268, + "step": 1657 + }, + { + "epoch": 5.517470881863561, + "grad_norm": 17.294458389282227, + "learning_rate": 5e-06, + "loss": 0.4707, + "num_input_tokens_seen": 103912924, + "step": 1658 + }, + { + "epoch": 5.517470881863561, + "loss": 0.45224249362945557, + "loss_ce": 0.0002466700680088252, + "loss_iou": 0.1630859375, + "loss_num": 0.025390625, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 103912924, + "step": 1658 + }, + { + "epoch": 5.520798668885192, + "grad_norm": 8.51252269744873, + "learning_rate": 5e-06, + "loss": 0.6149, + "num_input_tokens_seen": 103975188, + "step": 1659 + }, + { + "epoch": 5.520798668885192, + "loss": 0.6649562120437622, + "loss_ce": 0.00040545634692534804, + "loss_iou": 0.2138671875, + "loss_num": 0.047607421875, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 103975188, + "step": 1659 + }, + { + "epoch": 5.524126455906822, + "grad_norm": 15.996707916259766, + "learning_rate": 5e-06, + "loss": 0.7851, + "num_input_tokens_seen": 104037616, + "step": 1660 + }, + { + "epoch": 5.524126455906822, + "loss": 0.8272863626480103, + "loss_ce": 1.5841425920370966e-05, + "loss_iou": 0.27734375, + "loss_num": 0.054443359375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 104037616, + "step": 1660 + }, + { + "epoch": 5.527454242928453, + "grad_norm": 12.95364761352539, + "learning_rate": 5e-06, + "loss": 0.5927, + "num_input_tokens_seen": 104099448, + "step": 1661 + }, + { + "epoch": 5.527454242928453, + "loss": 0.4973302185535431, + "loss_ce": 0.0005956004024483263, + "loss_iou": 0.1328125, + "loss_num": 0.04638671875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 104099448, + "step": 1661 + }, + { + "epoch": 5.530782029950084, + "grad_norm": 11.21883773803711, + "learning_rate": 5e-06, + "loss": 0.6192, + "num_input_tokens_seen": 104161152, + "step": 1662 + }, + { + "epoch": 5.530782029950084, + "loss": 0.8068508505821228, + "loss_ce": 0.0006985386717133224, + "loss_iou": 0.328125, + "loss_num": 0.0302734375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 104161152, + "step": 1662 + }, + { + "epoch": 5.534109816971714, + "grad_norm": 13.665806770324707, + "learning_rate": 5e-06, + "loss": 0.7915, + "num_input_tokens_seen": 104224144, + "step": 1663 + }, + { + "epoch": 5.534109816971714, + "loss": 0.9511141180992126, + "loss_ce": 0.00043049981468357146, + "loss_iou": 0.365234375, + "loss_num": 0.044189453125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 104224144, + "step": 1663 + }, + { + "epoch": 5.537437603993345, + "grad_norm": 8.892619132995605, + "learning_rate": 5e-06, + "loss": 0.8575, + "num_input_tokens_seen": 104287548, + "step": 1664 + }, + { + "epoch": 5.537437603993345, + "loss": 0.8013952970504761, + "loss_ce": 0.001346514793112874, + "loss_iou": 0.318359375, + "loss_num": 0.03271484375, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 104287548, + "step": 1664 + }, + { + "epoch": 5.5407653910149754, + "grad_norm": 8.533040046691895, + "learning_rate": 5e-06, + "loss": 0.6726, + "num_input_tokens_seen": 104348436, + "step": 1665 + }, + { + "epoch": 5.5407653910149754, + "loss": 0.4962202310562134, + "loss_ce": 4.4194543988851365e-06, + "loss_iou": 0.1328125, + "loss_num": 0.046142578125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 104348436, + "step": 1665 + }, + { + "epoch": 5.544093178036606, + "grad_norm": 15.838689804077148, + "learning_rate": 5e-06, + "loss": 0.7047, + "num_input_tokens_seen": 104411136, + "step": 1666 + }, + { + "epoch": 5.544093178036606, + "loss": 0.7474393844604492, + "loss_ce": 2.877732640627073e-06, + "loss_iou": 0.234375, + "loss_num": 0.055908203125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 104411136, + "step": 1666 + }, + { + "epoch": 5.547420965058237, + "grad_norm": 13.48023796081543, + "learning_rate": 5e-06, + "loss": 0.54, + "num_input_tokens_seen": 104473460, + "step": 1667 + }, + { + "epoch": 5.547420965058237, + "loss": 0.4172057807445526, + "loss_ce": 0.0005798295023851097, + "loss_iou": 0.1630859375, + "loss_num": 0.0181884765625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 104473460, + "step": 1667 + }, + { + "epoch": 5.550748752079867, + "grad_norm": 13.00550651550293, + "learning_rate": 5e-06, + "loss": 0.5776, + "num_input_tokens_seen": 104535100, + "step": 1668 + }, + { + "epoch": 5.550748752079867, + "loss": 0.5370232462882996, + "loss_ce": 0.00028008728986606, + "loss_iou": 0.1533203125, + "loss_num": 0.046142578125, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 104535100, + "step": 1668 + }, + { + "epoch": 5.554076539101498, + "grad_norm": 25.64752769470215, + "learning_rate": 5e-06, + "loss": 0.5463, + "num_input_tokens_seen": 104597888, + "step": 1669 + }, + { + "epoch": 5.554076539101498, + "loss": 0.4867459237575531, + "loss_ce": 0.0007840264588594437, + "loss_iou": 0.1708984375, + "loss_num": 0.02880859375, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 104597888, + "step": 1669 + }, + { + "epoch": 5.5574043261231285, + "grad_norm": 11.490925788879395, + "learning_rate": 5e-06, + "loss": 0.6501, + "num_input_tokens_seen": 104659476, + "step": 1670 + }, + { + "epoch": 5.5574043261231285, + "loss": 0.45913708209991455, + "loss_ce": 3.0645875085610896e-05, + "loss_iou": 0.1064453125, + "loss_num": 0.049072265625, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 104659476, + "step": 1670 + }, + { + "epoch": 5.560732113144759, + "grad_norm": 11.503711700439453, + "learning_rate": 5e-06, + "loss": 0.7049, + "num_input_tokens_seen": 104723016, + "step": 1671 + }, + { + "epoch": 5.560732113144759, + "loss": 0.8446140289306641, + "loss_ce": 0.0008640355663374066, + "loss_iou": 0.314453125, + "loss_num": 0.04345703125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 104723016, + "step": 1671 + }, + { + "epoch": 5.56405990016639, + "grad_norm": 8.733305931091309, + "learning_rate": 5e-06, + "loss": 0.5664, + "num_input_tokens_seen": 104784856, + "step": 1672 + }, + { + "epoch": 5.56405990016639, + "loss": 0.4931206703186035, + "loss_ce": 0.0008110986091196537, + "loss_iou": 0.1845703125, + "loss_num": 0.024658203125, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 104784856, + "step": 1672 + }, + { + "epoch": 5.56738768718802, + "grad_norm": 9.786175727844238, + "learning_rate": 5e-06, + "loss": 0.846, + "num_input_tokens_seen": 104848380, + "step": 1673 + }, + { + "epoch": 5.56738768718802, + "loss": 0.7731995582580566, + "loss_ce": 6.221240255399607e-06, + "loss_iou": 0.2470703125, + "loss_num": 0.0556640625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 104848380, + "step": 1673 + }, + { + "epoch": 5.570715474209651, + "grad_norm": 13.724798202514648, + "learning_rate": 5e-06, + "loss": 0.6908, + "num_input_tokens_seen": 104910960, + "step": 1674 + }, + { + "epoch": 5.570715474209651, + "loss": 0.691491961479187, + "loss_ce": 8.570987120037898e-05, + "loss_iou": 0.25, + "loss_num": 0.037841796875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 104910960, + "step": 1674 + }, + { + "epoch": 5.574043261231282, + "grad_norm": 12.126524925231934, + "learning_rate": 5e-06, + "loss": 0.7477, + "num_input_tokens_seen": 104974332, + "step": 1675 + }, + { + "epoch": 5.574043261231282, + "loss": 0.45954495668411255, + "loss_ce": 7.229376933537424e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.022705078125, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 104974332, + "step": 1675 + }, + { + "epoch": 5.577371048252912, + "grad_norm": 21.586347579956055, + "learning_rate": 5e-06, + "loss": 0.6319, + "num_input_tokens_seen": 105036216, + "step": 1676 + }, + { + "epoch": 5.577371048252912, + "loss": 0.858528733253479, + "loss_ce": 6.919073348399252e-05, + "loss_iou": 0.3359375, + "loss_num": 0.037353515625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 105036216, + "step": 1676 + }, + { + "epoch": 5.580698835274543, + "grad_norm": 6.689985275268555, + "learning_rate": 5e-06, + "loss": 0.5895, + "num_input_tokens_seen": 105098888, + "step": 1677 + }, + { + "epoch": 5.580698835274543, + "loss": 0.5708335041999817, + "loss_ce": 0.0006430556531995535, + "loss_iou": 0.158203125, + "loss_num": 0.05078125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 105098888, + "step": 1677 + }, + { + "epoch": 5.584026622296173, + "grad_norm": 23.635780334472656, + "learning_rate": 5e-06, + "loss": 0.6477, + "num_input_tokens_seen": 105160692, + "step": 1678 + }, + { + "epoch": 5.584026622296173, + "loss": 0.745747447013855, + "loss_ce": 0.0005082354764454067, + "loss_iou": 0.271484375, + "loss_num": 0.0400390625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 105160692, + "step": 1678 + }, + { + "epoch": 5.587354409317804, + "grad_norm": 17.890554428100586, + "learning_rate": 5e-06, + "loss": 0.8851, + "num_input_tokens_seen": 105224932, + "step": 1679 + }, + { + "epoch": 5.587354409317804, + "loss": 0.6944231986999512, + "loss_ce": 0.0005755070014856756, + "loss_iou": 0.2890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 105224932, + "step": 1679 + }, + { + "epoch": 5.590682196339435, + "grad_norm": 36.01473617553711, + "learning_rate": 5e-06, + "loss": 0.5336, + "num_input_tokens_seen": 105287516, + "step": 1680 + }, + { + "epoch": 5.590682196339435, + "loss": 0.4492305517196655, + "loss_ce": 1.179968148790067e-05, + "loss_iou": 0.13671875, + "loss_num": 0.03515625, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 105287516, + "step": 1680 + }, + { + "epoch": 5.594009983361065, + "grad_norm": 15.766006469726562, + "learning_rate": 5e-06, + "loss": 0.8177, + "num_input_tokens_seen": 105349820, + "step": 1681 + }, + { + "epoch": 5.594009983361065, + "loss": 0.9034368991851807, + "loss_ce": 0.00011656155402306467, + "loss_iou": 0.330078125, + "loss_num": 0.048583984375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 105349820, + "step": 1681 + }, + { + "epoch": 5.597337770382696, + "grad_norm": 10.683774948120117, + "learning_rate": 5e-06, + "loss": 0.596, + "num_input_tokens_seen": 105411476, + "step": 1682 + }, + { + "epoch": 5.597337770382696, + "loss": 0.8543940186500549, + "loss_ce": 0.000634245399851352, + "loss_iou": 0.291015625, + "loss_num": 0.0546875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 105411476, + "step": 1682 + }, + { + "epoch": 5.6006655574043265, + "grad_norm": 15.730971336364746, + "learning_rate": 5e-06, + "loss": 0.7665, + "num_input_tokens_seen": 105473888, + "step": 1683 + }, + { + "epoch": 5.6006655574043265, + "loss": 0.8576712012290955, + "loss_ce": 5.173462795937667e-06, + "loss_iou": 0.28515625, + "loss_num": 0.057861328125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 105473888, + "step": 1683 + }, + { + "epoch": 5.603993344425957, + "grad_norm": 17.79537582397461, + "learning_rate": 5e-06, + "loss": 0.7979, + "num_input_tokens_seen": 105537972, + "step": 1684 + }, + { + "epoch": 5.603993344425957, + "loss": 0.8166719675064087, + "loss_ce": 0.00026570551563054323, + "loss_iou": 0.279296875, + "loss_num": 0.051513671875, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 105537972, + "step": 1684 + }, + { + "epoch": 5.607321131447588, + "grad_norm": 15.09589672088623, + "learning_rate": 5e-06, + "loss": 0.6113, + "num_input_tokens_seen": 105600116, + "step": 1685 + }, + { + "epoch": 5.607321131447588, + "loss": 0.7013496160507202, + "loss_ce": 5.5670123401796445e-05, + "loss_iou": 0.1875, + "loss_num": 0.0654296875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 105600116, + "step": 1685 + }, + { + "epoch": 5.610648918469218, + "grad_norm": 9.822373390197754, + "learning_rate": 5e-06, + "loss": 0.5208, + "num_input_tokens_seen": 105661696, + "step": 1686 + }, + { + "epoch": 5.610648918469218, + "loss": 0.4002895951271057, + "loss_ce": 0.00044828990940004587, + "loss_iou": 0.1044921875, + "loss_num": 0.0380859375, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 105661696, + "step": 1686 + }, + { + "epoch": 5.613976705490849, + "grad_norm": 12.23663330078125, + "learning_rate": 5e-06, + "loss": 0.8393, + "num_input_tokens_seen": 105724768, + "step": 1687 + }, + { + "epoch": 5.613976705490849, + "loss": 0.684592604637146, + "loss_ce": 2.223967749159783e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.048095703125, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 105724768, + "step": 1687 + }, + { + "epoch": 5.6173044925124795, + "grad_norm": 38.26483917236328, + "learning_rate": 5e-06, + "loss": 0.5627, + "num_input_tokens_seen": 105787560, + "step": 1688 + }, + { + "epoch": 5.6173044925124795, + "loss": 0.44944727420806885, + "loss_ce": 0.0004726658225990832, + "loss_iou": 0.150390625, + "loss_num": 0.0296630859375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 105787560, + "step": 1688 + }, + { + "epoch": 5.62063227953411, + "grad_norm": 17.305635452270508, + "learning_rate": 5e-06, + "loss": 0.6798, + "num_input_tokens_seen": 105851016, + "step": 1689 + }, + { + "epoch": 5.62063227953411, + "loss": 0.553048849105835, + "loss_ce": 9.341451004729606e-06, + "loss_iou": 0.20703125, + "loss_num": 0.02783203125, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 105851016, + "step": 1689 + }, + { + "epoch": 5.623960066555741, + "grad_norm": 19.972763061523438, + "learning_rate": 5e-06, + "loss": 0.7356, + "num_input_tokens_seen": 105912112, + "step": 1690 + }, + { + "epoch": 5.623960066555741, + "loss": 0.8830522298812866, + "loss_ce": 0.00023976643569767475, + "loss_iou": 0.310546875, + "loss_num": 0.05224609375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 105912112, + "step": 1690 + }, + { + "epoch": 5.627287853577371, + "grad_norm": 66.9946060180664, + "learning_rate": 5e-06, + "loss": 0.8263, + "num_input_tokens_seen": 105975356, + "step": 1691 + }, + { + "epoch": 5.627287853577371, + "loss": 0.643821120262146, + "loss_ce": 2.2260259356698953e-05, + "loss_iou": 0.2333984375, + "loss_num": 0.035400390625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 105975356, + "step": 1691 + }, + { + "epoch": 5.630615640599002, + "grad_norm": 33.13805389404297, + "learning_rate": 5e-06, + "loss": 0.6637, + "num_input_tokens_seen": 106037320, + "step": 1692 + }, + { + "epoch": 5.630615640599002, + "loss": 0.5694460272789001, + "loss_ce": 0.00035422114888206124, + "loss_iou": 0.2294921875, + "loss_num": 0.022216796875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 106037320, + "step": 1692 + }, + { + "epoch": 5.633943427620633, + "grad_norm": 15.999832153320312, + "learning_rate": 5e-06, + "loss": 0.7618, + "num_input_tokens_seen": 106100968, + "step": 1693 + }, + { + "epoch": 5.633943427620633, + "loss": 0.5870428085327148, + "loss_ce": 0.0003728655865415931, + "loss_iou": 0.208984375, + "loss_num": 0.03369140625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 106100968, + "step": 1693 + }, + { + "epoch": 5.637271214642263, + "grad_norm": 22.809717178344727, + "learning_rate": 5e-06, + "loss": 0.5141, + "num_input_tokens_seen": 106163984, + "step": 1694 + }, + { + "epoch": 5.637271214642263, + "loss": 0.5070875883102417, + "loss_ce": 7.488038590963697e-06, + "loss_iou": 0.1787109375, + "loss_num": 0.0301513671875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 106163984, + "step": 1694 + }, + { + "epoch": 5.640599001663894, + "grad_norm": 10.32789134979248, + "learning_rate": 5e-06, + "loss": 0.4488, + "num_input_tokens_seen": 106225564, + "step": 1695 + }, + { + "epoch": 5.640599001663894, + "loss": 0.332084059715271, + "loss_ce": 7.034345344436588e-06, + "loss_iou": 0.1103515625, + "loss_num": 0.022216796875, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 106225564, + "step": 1695 + }, + { + "epoch": 5.643926788685524, + "grad_norm": 7.523070335388184, + "learning_rate": 5e-06, + "loss": 0.6172, + "num_input_tokens_seen": 106287900, + "step": 1696 + }, + { + "epoch": 5.643926788685524, + "loss": 0.775996208190918, + "loss_ce": 0.00023933117336127907, + "loss_iou": 0.27734375, + "loss_num": 0.04443359375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 106287900, + "step": 1696 + }, + { + "epoch": 5.647254575707155, + "grad_norm": 15.663951873779297, + "learning_rate": 5e-06, + "loss": 0.4675, + "num_input_tokens_seen": 106348884, + "step": 1697 + }, + { + "epoch": 5.647254575707155, + "loss": 0.4975832402706146, + "loss_ce": 2.462265911162831e-05, + "loss_iou": 0.169921875, + "loss_num": 0.031494140625, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 106348884, + "step": 1697 + }, + { + "epoch": 5.650582362728786, + "grad_norm": 9.897237777709961, + "learning_rate": 5e-06, + "loss": 0.7378, + "num_input_tokens_seen": 106411368, + "step": 1698 + }, + { + "epoch": 5.650582362728786, + "loss": 0.7313656806945801, + "loss_ce": 0.0006527729565277696, + "loss_iou": 0.244140625, + "loss_num": 0.04833984375, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 106411368, + "step": 1698 + }, + { + "epoch": 5.653910149750416, + "grad_norm": 27.847606658935547, + "learning_rate": 5e-06, + "loss": 0.7811, + "num_input_tokens_seen": 106473388, + "step": 1699 + }, + { + "epoch": 5.653910149750416, + "loss": 1.0008139610290527, + "loss_ce": 0.0003256350464653224, + "loss_iou": 0.32421875, + "loss_num": 0.07080078125, + "loss_xval": 1.0, + "num_input_tokens_seen": 106473388, + "step": 1699 + }, + { + "epoch": 5.657237936772047, + "grad_norm": 11.24642276763916, + "learning_rate": 5e-06, + "loss": 0.8014, + "num_input_tokens_seen": 106537688, + "step": 1700 + }, + { + "epoch": 5.657237936772047, + "loss": 0.9711390137672424, + "loss_ce": 0.0006800297414883971, + "loss_iou": 0.326171875, + "loss_num": 0.0634765625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 106537688, + "step": 1700 + }, + { + "epoch": 5.6605657237936775, + "grad_norm": 14.969793319702148, + "learning_rate": 5e-06, + "loss": 0.7234, + "num_input_tokens_seen": 106600456, + "step": 1701 + }, + { + "epoch": 5.6605657237936775, + "loss": 0.5358263254165649, + "loss_ce": 0.000670083099976182, + "loss_iou": 0.1923828125, + "loss_num": 0.030029296875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 106600456, + "step": 1701 + }, + { + "epoch": 5.663893510815308, + "grad_norm": 12.321464538574219, + "learning_rate": 5e-06, + "loss": 0.5965, + "num_input_tokens_seen": 106662656, + "step": 1702 + }, + { + "epoch": 5.663893510815308, + "loss": 0.7050811052322388, + "loss_ce": 0.000857477483805269, + "loss_iou": 0.205078125, + "loss_num": 0.058837890625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 106662656, + "step": 1702 + }, + { + "epoch": 5.667221297836939, + "grad_norm": 17.309473037719727, + "learning_rate": 5e-06, + "loss": 0.8267, + "num_input_tokens_seen": 106726504, + "step": 1703 + }, + { + "epoch": 5.667221297836939, + "loss": 0.8586465120315552, + "loss_ce": 3.938740519515704e-06, + "loss_iou": 0.330078125, + "loss_num": 0.039306640625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 106726504, + "step": 1703 + }, + { + "epoch": 5.670549084858569, + "grad_norm": 21.560821533203125, + "learning_rate": 5e-06, + "loss": 0.6404, + "num_input_tokens_seen": 106789148, + "step": 1704 + }, + { + "epoch": 5.670549084858569, + "loss": 0.4272652268409729, + "loss_ce": 1.9131181034026667e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.0294189453125, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 106789148, + "step": 1704 + }, + { + "epoch": 5.6738768718802, + "grad_norm": 68.77386474609375, + "learning_rate": 5e-06, + "loss": 0.7511, + "num_input_tokens_seen": 106851232, + "step": 1705 + }, + { + "epoch": 5.6738768718802, + "loss": 0.5564388632774353, + "loss_ce": 4.237954271957278e-05, + "loss_iou": 0.166015625, + "loss_num": 0.044921875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 106851232, + "step": 1705 + }, + { + "epoch": 5.677204658901831, + "grad_norm": 9.309255599975586, + "learning_rate": 5e-06, + "loss": 0.6206, + "num_input_tokens_seen": 106914752, + "step": 1706 + }, + { + "epoch": 5.677204658901831, + "loss": 0.5360674858093262, + "loss_ce": 5.677162334905006e-05, + "loss_iou": 0.212890625, + "loss_num": 0.022216796875, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 106914752, + "step": 1706 + }, + { + "epoch": 5.680532445923461, + "grad_norm": 9.004875183105469, + "learning_rate": 5e-06, + "loss": 0.5238, + "num_input_tokens_seen": 106976952, + "step": 1707 + }, + { + "epoch": 5.680532445923461, + "loss": 0.45058244466781616, + "loss_ce": 0.00014298340829554945, + "loss_iou": 0.1279296875, + "loss_num": 0.038818359375, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 106976952, + "step": 1707 + }, + { + "epoch": 5.683860232945092, + "grad_norm": 12.503425598144531, + "learning_rate": 5e-06, + "loss": 0.6518, + "num_input_tokens_seen": 107039528, + "step": 1708 + }, + { + "epoch": 5.683860232945092, + "loss": 0.7054813504219055, + "loss_ce": 0.000281153799733147, + "loss_iou": 0.2236328125, + "loss_num": 0.0517578125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 107039528, + "step": 1708 + }, + { + "epoch": 5.687188019966722, + "grad_norm": 123.16822052001953, + "learning_rate": 5e-06, + "loss": 0.6203, + "num_input_tokens_seen": 107103448, + "step": 1709 + }, + { + "epoch": 5.687188019966722, + "loss": 0.49697205424308777, + "loss_ce": 0.00014590048522222787, + "loss_iou": 0.1650390625, + "loss_num": 0.033203125, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 107103448, + "step": 1709 + }, + { + "epoch": 5.690515806988353, + "grad_norm": 19.90471839904785, + "learning_rate": 5e-06, + "loss": 0.6721, + "num_input_tokens_seen": 107165844, + "step": 1710 + }, + { + "epoch": 5.690515806988353, + "loss": 0.703316330909729, + "loss_ce": 0.00019130959117319435, + "loss_iou": 0.2138671875, + "loss_num": 0.05517578125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 107165844, + "step": 1710 + }, + { + "epoch": 5.693843594009984, + "grad_norm": 26.4517765045166, + "learning_rate": 5e-06, + "loss": 0.7115, + "num_input_tokens_seen": 107228880, + "step": 1711 + }, + { + "epoch": 5.693843594009984, + "loss": 0.5219588279724121, + "loss_ce": 0.0009016690892167389, + "loss_iou": 0.1787109375, + "loss_num": 0.032958984375, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 107228880, + "step": 1711 + }, + { + "epoch": 5.697171381031614, + "grad_norm": 24.016637802124023, + "learning_rate": 5e-06, + "loss": 0.6032, + "num_input_tokens_seen": 107291652, + "step": 1712 + }, + { + "epoch": 5.697171381031614, + "loss": 0.6287029385566711, + "loss_ce": 0.0006511926185339689, + "loss_iou": 0.1416015625, + "loss_num": 0.06884765625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 107291652, + "step": 1712 + }, + { + "epoch": 5.700499168053245, + "grad_norm": 12.63536548614502, + "learning_rate": 5e-06, + "loss": 0.5321, + "num_input_tokens_seen": 107353984, + "step": 1713 + }, + { + "epoch": 5.700499168053245, + "loss": 0.6348440647125244, + "loss_ce": 0.001055013621225953, + "loss_iou": 0.2236328125, + "loss_num": 0.037109375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 107353984, + "step": 1713 + }, + { + "epoch": 5.7038269550748755, + "grad_norm": 28.98735237121582, + "learning_rate": 5e-06, + "loss": 0.8102, + "num_input_tokens_seen": 107417396, + "step": 1714 + }, + { + "epoch": 5.7038269550748755, + "loss": 0.7450417280197144, + "loss_ce": 0.00016867804515641183, + "loss_iou": 0.265625, + "loss_num": 0.042724609375, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 107417396, + "step": 1714 + }, + { + "epoch": 5.707154742096506, + "grad_norm": 12.304791450500488, + "learning_rate": 5e-06, + "loss": 0.4742, + "num_input_tokens_seen": 107480376, + "step": 1715 + }, + { + "epoch": 5.707154742096506, + "loss": 0.4631630778312683, + "loss_ce": 8.936785161495209e-05, + "loss_iou": 0.1513671875, + "loss_num": 0.0322265625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 107480376, + "step": 1715 + }, + { + "epoch": 5.710482529118137, + "grad_norm": 14.445513725280762, + "learning_rate": 5e-06, + "loss": 0.8061, + "num_input_tokens_seen": 107544188, + "step": 1716 + }, + { + "epoch": 5.710482529118137, + "loss": 0.8209518194198608, + "loss_ce": 0.0010055896127596498, + "loss_iou": 0.2470703125, + "loss_num": 0.06494140625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 107544188, + "step": 1716 + }, + { + "epoch": 5.713810316139767, + "grad_norm": 8.823054313659668, + "learning_rate": 5e-06, + "loss": 0.4795, + "num_input_tokens_seen": 107605148, + "step": 1717 + }, + { + "epoch": 5.713810316139767, + "loss": 0.4528508484363556, + "loss_ce": 0.0004582681867759675, + "loss_iou": 0.078125, + "loss_num": 0.059326171875, + "loss_xval": 0.453125, + "num_input_tokens_seen": 107605148, + "step": 1717 + }, + { + "epoch": 5.717138103161398, + "grad_norm": 16.8961181640625, + "learning_rate": 5e-06, + "loss": 0.4838, + "num_input_tokens_seen": 107668080, + "step": 1718 + }, + { + "epoch": 5.717138103161398, + "loss": 0.4450841546058655, + "loss_ce": 0.0009313120390288532, + "loss_iou": 0.15234375, + "loss_num": 0.0281982421875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 107668080, + "step": 1718 + }, + { + "epoch": 5.7204658901830285, + "grad_norm": 9.961505889892578, + "learning_rate": 5e-06, + "loss": 0.341, + "num_input_tokens_seen": 107729940, + "step": 1719 + }, + { + "epoch": 5.7204658901830285, + "loss": 0.3053891658782959, + "loss_ce": 9.12985997274518e-05, + "loss_iou": 0.078125, + "loss_num": 0.0299072265625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 107729940, + "step": 1719 + }, + { + "epoch": 5.723793677204659, + "grad_norm": 11.72249984741211, + "learning_rate": 5e-06, + "loss": 0.6196, + "num_input_tokens_seen": 107791916, + "step": 1720 + }, + { + "epoch": 5.723793677204659, + "loss": 0.8364963531494141, + "loss_ce": 0.0005588560015894473, + "loss_iou": 0.314453125, + "loss_num": 0.04150390625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 107791916, + "step": 1720 + }, + { + "epoch": 5.72712146422629, + "grad_norm": 22.064348220825195, + "learning_rate": 5e-06, + "loss": 0.6433, + "num_input_tokens_seen": 107855036, + "step": 1721 + }, + { + "epoch": 5.72712146422629, + "loss": 0.5307466983795166, + "loss_ce": 0.0015108881052583456, + "loss_iou": 0.13671875, + "loss_num": 0.05126953125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 107855036, + "step": 1721 + }, + { + "epoch": 5.73044925124792, + "grad_norm": 9.683394432067871, + "learning_rate": 5e-06, + "loss": 0.6907, + "num_input_tokens_seen": 107918084, + "step": 1722 + }, + { + "epoch": 5.73044925124792, + "loss": 0.5834569931030273, + "loss_ce": 0.00038815615698695183, + "loss_iou": 0.1796875, + "loss_num": 0.044921875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 107918084, + "step": 1722 + }, + { + "epoch": 5.733777038269551, + "grad_norm": 6.485795974731445, + "learning_rate": 5e-06, + "loss": 0.4933, + "num_input_tokens_seen": 107980216, + "step": 1723 + }, + { + "epoch": 5.733777038269551, + "loss": 0.6450976133346558, + "loss_ce": 0.0006884537287987769, + "loss_iou": 0.1630859375, + "loss_num": 0.06396484375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 107980216, + "step": 1723 + }, + { + "epoch": 5.737104825291182, + "grad_norm": 17.993772506713867, + "learning_rate": 5e-06, + "loss": 0.7217, + "num_input_tokens_seen": 108041460, + "step": 1724 + }, + { + "epoch": 5.737104825291182, + "loss": 0.8249725103378296, + "loss_ce": 8.237032307079062e-05, + "loss_iou": 0.298828125, + "loss_num": 0.04541015625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 108041460, + "step": 1724 + }, + { + "epoch": 5.740432612312812, + "grad_norm": 23.316713333129883, + "learning_rate": 5e-06, + "loss": 0.624, + "num_input_tokens_seen": 108104100, + "step": 1725 + }, + { + "epoch": 5.740432612312812, + "loss": 0.7806700468063354, + "loss_ce": 0.002044552704319358, + "loss_iou": 0.2314453125, + "loss_num": 0.06298828125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 108104100, + "step": 1725 + }, + { + "epoch": 5.743760399334443, + "grad_norm": 14.220916748046875, + "learning_rate": 5e-06, + "loss": 0.8526, + "num_input_tokens_seen": 108166016, + "step": 1726 + }, + { + "epoch": 5.743760399334443, + "loss": 0.9530130624771118, + "loss_ce": 1.0050905984826386e-05, + "loss_iou": 0.31640625, + "loss_num": 0.06396484375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 108166016, + "step": 1726 + }, + { + "epoch": 5.747088186356073, + "grad_norm": 19.730939865112305, + "learning_rate": 5e-06, + "loss": 0.7547, + "num_input_tokens_seen": 108228780, + "step": 1727 + }, + { + "epoch": 5.747088186356073, + "loss": 1.039328932762146, + "loss_ce": 0.0002664237981662154, + "loss_iou": 0.322265625, + "loss_num": 0.0791015625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 108228780, + "step": 1727 + }, + { + "epoch": 5.750415973377704, + "grad_norm": 12.41170597076416, + "learning_rate": 5e-06, + "loss": 0.7916, + "num_input_tokens_seen": 108292028, + "step": 1728 + }, + { + "epoch": 5.750415973377704, + "loss": 0.9140459299087524, + "loss_ce": 0.0005938184331171215, + "loss_iou": 0.287109375, + "loss_num": 0.0673828125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 108292028, + "step": 1728 + }, + { + "epoch": 5.753743760399335, + "grad_norm": 13.194222450256348, + "learning_rate": 5e-06, + "loss": 0.8602, + "num_input_tokens_seen": 108355096, + "step": 1729 + }, + { + "epoch": 5.753743760399335, + "loss": 0.9234409928321838, + "loss_ce": 0.00010111027950188145, + "loss_iou": 0.357421875, + "loss_num": 0.041748046875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 108355096, + "step": 1729 + }, + { + "epoch": 5.757071547420965, + "grad_norm": 20.04388427734375, + "learning_rate": 5e-06, + "loss": 0.7181, + "num_input_tokens_seen": 108417048, + "step": 1730 + }, + { + "epoch": 5.757071547420965, + "loss": 0.9073438048362732, + "loss_ce": 0.0006054791738279164, + "loss_iou": 0.330078125, + "loss_num": 0.04931640625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 108417048, + "step": 1730 + }, + { + "epoch": 5.760399334442596, + "grad_norm": 17.463380813598633, + "learning_rate": 5e-06, + "loss": 0.741, + "num_input_tokens_seen": 108480692, + "step": 1731 + }, + { + "epoch": 5.760399334442596, + "loss": 0.6669446229934692, + "loss_ce": 7.449155964422971e-05, + "loss_iou": 0.224609375, + "loss_num": 0.043701171875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 108480692, + "step": 1731 + }, + { + "epoch": 5.7637271214642265, + "grad_norm": 22.218618392944336, + "learning_rate": 5e-06, + "loss": 0.5948, + "num_input_tokens_seen": 108542816, + "step": 1732 + }, + { + "epoch": 5.7637271214642265, + "loss": 0.4774358868598938, + "loss_ce": 0.000995467184111476, + "loss_iou": 0.150390625, + "loss_num": 0.03515625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 108542816, + "step": 1732 + }, + { + "epoch": 5.767054908485857, + "grad_norm": 12.377209663391113, + "learning_rate": 5e-06, + "loss": 0.4993, + "num_input_tokens_seen": 108606396, + "step": 1733 + }, + { + "epoch": 5.767054908485857, + "loss": 0.5402237176895142, + "loss_ce": 0.00019988913845736533, + "loss_iou": 0.1767578125, + "loss_num": 0.037353515625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 108606396, + "step": 1733 + }, + { + "epoch": 5.770382695507488, + "grad_norm": 9.325462341308594, + "learning_rate": 5e-06, + "loss": 0.8247, + "num_input_tokens_seen": 108668468, + "step": 1734 + }, + { + "epoch": 5.770382695507488, + "loss": 0.9547851085662842, + "loss_ce": 0.00397943751886487, + "loss_iou": 0.302734375, + "loss_num": 0.0693359375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 108668468, + "step": 1734 + }, + { + "epoch": 5.773710482529118, + "grad_norm": 12.919450759887695, + "learning_rate": 5e-06, + "loss": 0.6592, + "num_input_tokens_seen": 108731260, + "step": 1735 + }, + { + "epoch": 5.773710482529118, + "loss": 0.5413807034492493, + "loss_ce": 0.0009144245414063334, + "loss_iou": 0.1748046875, + "loss_num": 0.0380859375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 108731260, + "step": 1735 + }, + { + "epoch": 5.777038269550749, + "grad_norm": 11.477002143859863, + "learning_rate": 5e-06, + "loss": 0.8761, + "num_input_tokens_seen": 108795324, + "step": 1736 + }, + { + "epoch": 5.777038269550749, + "loss": 0.9495025873184204, + "loss_ce": 0.0010161716490983963, + "loss_iou": 0.306640625, + "loss_num": 0.06640625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 108795324, + "step": 1736 + }, + { + "epoch": 5.78036605657238, + "grad_norm": 9.099837303161621, + "learning_rate": 5e-06, + "loss": 0.7722, + "num_input_tokens_seen": 108857560, + "step": 1737 + }, + { + "epoch": 5.78036605657238, + "loss": 0.7459558248519897, + "loss_ce": 0.000472470186650753, + "loss_iou": 0.24609375, + "loss_num": 0.05078125, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 108857560, + "step": 1737 + }, + { + "epoch": 5.78369384359401, + "grad_norm": 14.444173812866211, + "learning_rate": 5e-06, + "loss": 0.4758, + "num_input_tokens_seen": 108918780, + "step": 1738 + }, + { + "epoch": 5.78369384359401, + "loss": 0.4299461543560028, + "loss_ce": 1.4529123291140422e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.03662109375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 108918780, + "step": 1738 + }, + { + "epoch": 5.787021630615641, + "grad_norm": 17.906532287597656, + "learning_rate": 5e-06, + "loss": 0.6279, + "num_input_tokens_seen": 108980980, + "step": 1739 + }, + { + "epoch": 5.787021630615641, + "loss": 0.6438461542129517, + "loss_ce": 0.0005356057081371546, + "loss_iou": 0.2431640625, + "loss_num": 0.031494140625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 108980980, + "step": 1739 + }, + { + "epoch": 5.790349417637271, + "grad_norm": 13.573742866516113, + "learning_rate": 5e-06, + "loss": 0.7242, + "num_input_tokens_seen": 109043940, + "step": 1740 + }, + { + "epoch": 5.790349417637271, + "loss": 0.9529647827148438, + "loss_ce": 0.0005722053465433419, + "loss_iou": 0.333984375, + "loss_num": 0.056884765625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 109043940, + "step": 1740 + }, + { + "epoch": 5.793677204658902, + "grad_norm": 13.186030387878418, + "learning_rate": 5e-06, + "loss": 0.6727, + "num_input_tokens_seen": 109106144, + "step": 1741 + }, + { + "epoch": 5.793677204658902, + "loss": 0.6280597448348999, + "loss_ce": 0.000374168943380937, + "loss_iou": 0.2314453125, + "loss_num": 0.033203125, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 109106144, + "step": 1741 + }, + { + "epoch": 5.797004991680533, + "grad_norm": 11.846780776977539, + "learning_rate": 5e-06, + "loss": 0.5714, + "num_input_tokens_seen": 109169260, + "step": 1742 + }, + { + "epoch": 5.797004991680533, + "loss": 0.6322175860404968, + "loss_ce": 1.547130523249507e-05, + "loss_iou": 0.20703125, + "loss_num": 0.04345703125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 109169260, + "step": 1742 + }, + { + "epoch": 5.800332778702163, + "grad_norm": 17.20931053161621, + "learning_rate": 5e-06, + "loss": 0.6911, + "num_input_tokens_seen": 109232900, + "step": 1743 + }, + { + "epoch": 5.800332778702163, + "loss": 0.5807763338088989, + "loss_ce": 0.00020993230282329023, + "loss_iou": 0.150390625, + "loss_num": 0.05615234375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 109232900, + "step": 1743 + }, + { + "epoch": 5.803660565723794, + "grad_norm": 17.745994567871094, + "learning_rate": 5e-06, + "loss": 0.7031, + "num_input_tokens_seen": 109295748, + "step": 1744 + }, + { + "epoch": 5.803660565723794, + "loss": 0.8015212416648865, + "loss_ce": 7.5699854278354906e-06, + "loss_iou": 0.29296875, + "loss_num": 0.04296875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 109295748, + "step": 1744 + }, + { + "epoch": 5.8069883527454245, + "grad_norm": 8.905264854431152, + "learning_rate": 5e-06, + "loss": 0.4016, + "num_input_tokens_seen": 109358312, + "step": 1745 + }, + { + "epoch": 5.8069883527454245, + "loss": 0.39209282398223877, + "loss_ce": 2.9924158297944814e-06, + "loss_iou": 0.109375, + "loss_num": 0.03466796875, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 109358312, + "step": 1745 + }, + { + "epoch": 5.810316139767055, + "grad_norm": 5.492832660675049, + "learning_rate": 5e-06, + "loss": 0.593, + "num_input_tokens_seen": 109421796, + "step": 1746 + }, + { + "epoch": 5.810316139767055, + "loss": 0.4544283151626587, + "loss_ce": 0.00014364512753672898, + "loss_iou": 0.150390625, + "loss_num": 0.0303955078125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 109421796, + "step": 1746 + }, + { + "epoch": 5.813643926788686, + "grad_norm": 13.455323219299316, + "learning_rate": 5e-06, + "loss": 0.8716, + "num_input_tokens_seen": 109485112, + "step": 1747 + }, + { + "epoch": 5.813643926788686, + "loss": 0.9304600954055786, + "loss_ce": 0.0002843089459929615, + "loss_iou": 0.337890625, + "loss_num": 0.05078125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 109485112, + "step": 1747 + }, + { + "epoch": 5.816971713810316, + "grad_norm": 17.337217330932617, + "learning_rate": 5e-06, + "loss": 0.6384, + "num_input_tokens_seen": 109547828, + "step": 1748 + }, + { + "epoch": 5.816971713810316, + "loss": 0.8259046673774719, + "loss_ce": 0.00046522487537004054, + "loss_iou": 0.298828125, + "loss_num": 0.045654296875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 109547828, + "step": 1748 + }, + { + "epoch": 5.820299500831947, + "grad_norm": 8.980031967163086, + "learning_rate": 5e-06, + "loss": 0.5732, + "num_input_tokens_seen": 109610380, + "step": 1749 + }, + { + "epoch": 5.820299500831947, + "loss": 0.4640018939971924, + "loss_ce": 1.261913166672457e-05, + "loss_iou": 0.15625, + "loss_num": 0.0302734375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 109610380, + "step": 1749 + }, + { + "epoch": 5.8236272878535775, + "grad_norm": 13.945286750793457, + "learning_rate": 5e-06, + "loss": 0.5297, + "num_input_tokens_seen": 109672416, + "step": 1750 + }, + { + "epoch": 5.8236272878535775, + "eval_seeclick_CIoU": 0.10236945003271103, + "eval_seeclick_GIoU": 0.11712978780269623, + "eval_seeclick_IoU": 0.2036900669336319, + "eval_seeclick_MAE_all": 0.16942255944013596, + "eval_seeclick_MAE_h": 0.049304405227303505, + "eval_seeclick_MAE_w": 0.12896203249692917, + "eval_seeclick_MAE_x_boxes": 0.23738989233970642, + "eval_seeclick_MAE_y_boxes": 0.1408754587173462, + "eval_seeclick_NUM_probability": 0.9998235106468201, + "eval_seeclick_inside_bbox": 0.20937500149011612, + "eval_seeclick_loss": 2.7325868606567383, + "eval_seeclick_loss_ce": 0.11027439311146736, + "eval_seeclick_loss_iou": 0.889892578125, + "eval_seeclick_loss_num": 0.16834259033203125, + "eval_seeclick_loss_xval": 2.6220703125, + "eval_seeclick_runtime": 67.9027, + "eval_seeclick_samples_per_second": 0.692, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 109672416, + "step": 1750 + }, + { + "epoch": 5.8236272878535775, + "eval_icons_CIoU": 0.04104041517712176, + "eval_icons_GIoU": 0.1604958102107048, + "eval_icons_IoU": 0.19673043489456177, + "eval_icons_MAE_all": 0.14254845678806305, + "eval_icons_MAE_h": 0.05420855712145567, + "eval_icons_MAE_w": 0.15784180164337158, + "eval_icons_MAE_x_boxes": 0.15393128246068954, + "eval_icons_MAE_y_boxes": 0.047592333517968655, + "eval_icons_NUM_probability": 0.9999956786632538, + "eval_icons_inside_bbox": 0.3420138955116272, + "eval_icons_loss": 2.3909385204315186, + "eval_icons_loss_ce": 9.011300790007226e-07, + "eval_icons_loss_iou": 0.824462890625, + "eval_icons_loss_num": 0.14644622802734375, + "eval_icons_loss_xval": 2.3818359375, + "eval_icons_runtime": 68.4022, + "eval_icons_samples_per_second": 0.731, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 109672416, + "step": 1750 + }, + { + "epoch": 5.8236272878535775, + "eval_screenspot_CIoU": 0.06936451761672895, + "eval_screenspot_GIoU": 0.13237376511096954, + "eval_screenspot_IoU": 0.21043485403060913, + "eval_screenspot_MAE_all": 0.18726058304309845, + "eval_screenspot_MAE_h": 0.07297260562578838, + "eval_screenspot_MAE_w": 0.14615009228388467, + "eval_screenspot_MAE_x_boxes": 0.25001437962055206, + "eval_screenspot_MAE_y_boxes": 0.13508301973342896, + "eval_screenspot_NUM_probability": 0.9999534090360006, + "eval_screenspot_inside_bbox": 0.3625000019868215, + "eval_screenspot_loss": 2.6945841312408447, + "eval_screenspot_loss_ce": 5.838420596167756e-05, + "eval_screenspot_loss_iou": 0.88134765625, + "eval_screenspot_loss_num": 0.194793701171875, + "eval_screenspot_loss_xval": 2.7360026041666665, + "eval_screenspot_runtime": 119.8843, + "eval_screenspot_samples_per_second": 0.742, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 109672416, + "step": 1750 + }, + { + "epoch": 5.8236272878535775, + "eval_compot_CIoU": 0.003425128757953644, + "eval_compot_GIoU": 0.11101753637194633, + "eval_compot_IoU": 0.17780261486768723, + "eval_compot_MAE_all": 0.2019421085715294, + "eval_compot_MAE_h": 0.07107486762106419, + "eval_compot_MAE_w": 0.2705545723438263, + "eval_compot_MAE_x_boxes": 0.20419960469007492, + "eval_compot_MAE_y_boxes": 0.08390221372246742, + "eval_compot_NUM_probability": 0.9999839067459106, + "eval_compot_inside_bbox": 0.3107638955116272, + "eval_compot_loss": 2.85368275642395, + "eval_compot_loss_ce": 0.004224188975058496, + "eval_compot_loss_iou": 0.915283203125, + "eval_compot_loss_num": 0.2141571044921875, + "eval_compot_loss_xval": 2.9033203125, + "eval_compot_runtime": 72.3216, + "eval_compot_samples_per_second": 0.691, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 109672416, + "step": 1750 + }, + { + "epoch": 5.8236272878535775, + "eval_custom_ui_MAE_all": 0.08030644804239273, + "eval_custom_ui_MAE_x": 0.07421864569187164, + "eval_custom_ui_MAE_y": 0.08639425411820412, + "eval_custom_ui_NUM_probability": 0.999993622303009, + "eval_custom_ui_loss": 0.3886740803718567, + "eval_custom_ui_loss_ce": 2.7836218805532553e-06, + "eval_custom_ui_loss_num": 0.074920654296875, + "eval_custom_ui_loss_xval": 0.3746337890625, + "eval_custom_ui_runtime": 53.634, + "eval_custom_ui_samples_per_second": 0.932, + "eval_custom_ui_steps_per_second": 0.037, + "num_input_tokens_seen": 109672416, + "step": 1750 + }, + { + "epoch": 5.8236272878535775, + "loss": 0.3675568401813507, + "loss_ce": 3.1467793633055408e-06, + "loss_iou": 0.0, + "loss_num": 0.0732421875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 109672416, + "step": 1750 + }, + { + "epoch": 5.826955074875208, + "grad_norm": 9.24660873413086, + "learning_rate": 5e-06, + "loss": 0.8293, + "num_input_tokens_seen": 109735632, + "step": 1751 + }, + { + "epoch": 5.826955074875208, + "loss": 0.6466149091720581, + "loss_ce": 0.0006188148399814963, + "loss_iou": 0.2197265625, + "loss_num": 0.04150390625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 109735632, + "step": 1751 + }, + { + "epoch": 5.830282861896839, + "grad_norm": 8.810318946838379, + "learning_rate": 5e-06, + "loss": 0.7299, + "num_input_tokens_seen": 109799360, + "step": 1752 + }, + { + "epoch": 5.830282861896839, + "loss": 0.7848711013793945, + "loss_ce": 0.0011797224869951606, + "loss_iou": 0.3125, + "loss_num": 0.031494140625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 109799360, + "step": 1752 + }, + { + "epoch": 5.833610648918469, + "grad_norm": 10.353952407836914, + "learning_rate": 5e-06, + "loss": 0.6097, + "num_input_tokens_seen": 109860312, + "step": 1753 + }, + { + "epoch": 5.833610648918469, + "loss": 0.6687078475952148, + "loss_ce": 6.68507709633559e-06, + "loss_iou": 0.193359375, + "loss_num": 0.056396484375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 109860312, + "step": 1753 + }, + { + "epoch": 5.8369384359401, + "grad_norm": 37.4395866394043, + "learning_rate": 5e-06, + "loss": 0.6346, + "num_input_tokens_seen": 109921664, + "step": 1754 + }, + { + "epoch": 5.8369384359401, + "loss": 0.6058779358863831, + "loss_ce": 0.0012636694591492414, + "loss_iou": 0.2265625, + "loss_num": 0.0302734375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 109921664, + "step": 1754 + }, + { + "epoch": 5.840266222961731, + "grad_norm": 18.188020706176758, + "learning_rate": 5e-06, + "loss": 0.5968, + "num_input_tokens_seen": 109983676, + "step": 1755 + }, + { + "epoch": 5.840266222961731, + "loss": 0.5723915100097656, + "loss_ce": 0.003986367955803871, + "loss_iou": 0.1904296875, + "loss_num": 0.037841796875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 109983676, + "step": 1755 + }, + { + "epoch": 5.843594009983361, + "grad_norm": 17.082475662231445, + "learning_rate": 5e-06, + "loss": 0.6797, + "num_input_tokens_seen": 110046380, + "step": 1756 + }, + { + "epoch": 5.843594009983361, + "loss": 0.4576748311519623, + "loss_ce": 0.0001553079637233168, + "loss_iou": 0.158203125, + "loss_num": 0.0281982421875, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 110046380, + "step": 1756 + }, + { + "epoch": 5.846921797004992, + "grad_norm": 9.511809349060059, + "learning_rate": 5e-06, + "loss": 0.525, + "num_input_tokens_seen": 110109532, + "step": 1757 + }, + { + "epoch": 5.846921797004992, + "loss": 0.4188133776187897, + "loss_ce": 0.00011222133616684005, + "loss_iou": 0.10888671875, + "loss_num": 0.040283203125, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 110109532, + "step": 1757 + }, + { + "epoch": 5.850249584026622, + "grad_norm": 11.285202026367188, + "learning_rate": 5e-06, + "loss": 0.5777, + "num_input_tokens_seen": 110172300, + "step": 1758 + }, + { + "epoch": 5.850249584026622, + "loss": 0.657119631767273, + "loss_ce": 1.5172716302913614e-05, + "loss_iou": 0.21875, + "loss_num": 0.0439453125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 110172300, + "step": 1758 + }, + { + "epoch": 5.853577371048253, + "grad_norm": 13.522523880004883, + "learning_rate": 5e-06, + "loss": 0.4297, + "num_input_tokens_seen": 110235616, + "step": 1759 + }, + { + "epoch": 5.853577371048253, + "loss": 0.5291931629180908, + "loss_ce": 1.8332864783587866e-05, + "loss_iou": 0.1806640625, + "loss_num": 0.03369140625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 110235616, + "step": 1759 + }, + { + "epoch": 5.856905158069884, + "grad_norm": 10.221686363220215, + "learning_rate": 5e-06, + "loss": 0.3553, + "num_input_tokens_seen": 110296840, + "step": 1760 + }, + { + "epoch": 5.856905158069884, + "loss": 0.4893026351928711, + "loss_ce": 0.00016688673349563032, + "loss_iou": 0.123046875, + "loss_num": 0.048583984375, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 110296840, + "step": 1760 + }, + { + "epoch": 5.860232945091514, + "grad_norm": 21.090612411499023, + "learning_rate": 5e-06, + "loss": 0.6718, + "num_input_tokens_seen": 110359480, + "step": 1761 + }, + { + "epoch": 5.860232945091514, + "loss": 0.7827401161193848, + "loss_ce": 2.530876372475177e-05, + "loss_iou": 0.306640625, + "loss_num": 0.033935546875, + "loss_xval": 0.78125, + "num_input_tokens_seen": 110359480, + "step": 1761 + }, + { + "epoch": 5.863560732113145, + "grad_norm": 25.800317764282227, + "learning_rate": 5e-06, + "loss": 0.764, + "num_input_tokens_seen": 110421012, + "step": 1762 + }, + { + "epoch": 5.863560732113145, + "loss": 0.8109021186828613, + "loss_ce": 0.0003552451671566814, + "loss_iou": 0.27734375, + "loss_num": 0.05126953125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 110421012, + "step": 1762 + }, + { + "epoch": 5.8668885191347755, + "grad_norm": 13.869083404541016, + "learning_rate": 5e-06, + "loss": 0.6566, + "num_input_tokens_seen": 110484348, + "step": 1763 + }, + { + "epoch": 5.8668885191347755, + "loss": 0.7028859257698059, + "loss_ce": 5.098950623505516e-06, + "loss_iou": 0.2177734375, + "loss_num": 0.053466796875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 110484348, + "step": 1763 + }, + { + "epoch": 5.870216306156406, + "grad_norm": 11.48287296295166, + "learning_rate": 5e-06, + "loss": 0.7074, + "num_input_tokens_seen": 110547872, + "step": 1764 + }, + { + "epoch": 5.870216306156406, + "loss": 0.8577039241790771, + "loss_ce": 0.0011365680256858468, + "loss_iou": 0.32421875, + "loss_num": 0.042236328125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 110547872, + "step": 1764 + }, + { + "epoch": 5.873544093178037, + "grad_norm": 14.099336624145508, + "learning_rate": 5e-06, + "loss": 0.6525, + "num_input_tokens_seen": 110611396, + "step": 1765 + }, + { + "epoch": 5.873544093178037, + "loss": 0.7531905174255371, + "loss_ce": 0.0022139903157949448, + "loss_iou": 0.236328125, + "loss_num": 0.0556640625, + "loss_xval": 0.75, + "num_input_tokens_seen": 110611396, + "step": 1765 + }, + { + "epoch": 5.876871880199667, + "grad_norm": 17.61534309387207, + "learning_rate": 5e-06, + "loss": 0.8037, + "num_input_tokens_seen": 110673936, + "step": 1766 + }, + { + "epoch": 5.876871880199667, + "loss": 0.6117116808891296, + "loss_ce": 1.73293192347046e-05, + "loss_iou": 0.21875, + "loss_num": 0.034912109375, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 110673936, + "step": 1766 + }, + { + "epoch": 5.880199667221298, + "grad_norm": 31.511371612548828, + "learning_rate": 5e-06, + "loss": 0.7188, + "num_input_tokens_seen": 110737000, + "step": 1767 + }, + { + "epoch": 5.880199667221298, + "loss": 1.0573480129241943, + "loss_ce": 0.0015619065379723907, + "loss_iou": 0.32421875, + "loss_num": 0.08154296875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 110737000, + "step": 1767 + }, + { + "epoch": 5.883527454242929, + "grad_norm": 35.806556701660156, + "learning_rate": 5e-06, + "loss": 0.505, + "num_input_tokens_seen": 110800016, + "step": 1768 + }, + { + "epoch": 5.883527454242929, + "loss": 0.48276591300964355, + "loss_ce": 0.0013205973664298654, + "loss_iou": 0.15625, + "loss_num": 0.033935546875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 110800016, + "step": 1768 + }, + { + "epoch": 5.886855241264559, + "grad_norm": 25.79120635986328, + "learning_rate": 5e-06, + "loss": 0.5601, + "num_input_tokens_seen": 110864032, + "step": 1769 + }, + { + "epoch": 5.886855241264559, + "loss": 0.5003718137741089, + "loss_ce": 0.0004939304199069738, + "loss_iou": 0.1640625, + "loss_num": 0.034423828125, + "loss_xval": 0.5, + "num_input_tokens_seen": 110864032, + "step": 1769 + }, + { + "epoch": 5.89018302828619, + "grad_norm": 9.691274642944336, + "learning_rate": 5e-06, + "loss": 0.5045, + "num_input_tokens_seen": 110925816, + "step": 1770 + }, + { + "epoch": 5.89018302828619, + "loss": 0.4838283658027649, + "loss_ce": 3.321943586342968e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.0235595703125, + "loss_xval": 0.484375, + "num_input_tokens_seen": 110925816, + "step": 1770 + }, + { + "epoch": 5.89351081530782, + "grad_norm": 12.821023941040039, + "learning_rate": 5e-06, + "loss": 0.6885, + "num_input_tokens_seen": 110989460, + "step": 1771 + }, + { + "epoch": 5.89351081530782, + "loss": 0.5637482404708862, + "loss_ce": 0.0002716918243095279, + "loss_iou": 0.1845703125, + "loss_num": 0.03857421875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 110989460, + "step": 1771 + }, + { + "epoch": 5.896838602329451, + "grad_norm": 9.636116981506348, + "learning_rate": 5e-06, + "loss": 0.444, + "num_input_tokens_seen": 111052032, + "step": 1772 + }, + { + "epoch": 5.896838602329451, + "loss": 0.4734896123409271, + "loss_ce": 0.0005892139161005616, + "loss_iou": 0.173828125, + "loss_num": 0.025146484375, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 111052032, + "step": 1772 + }, + { + "epoch": 5.900166389351082, + "grad_norm": 18.86623764038086, + "learning_rate": 5e-06, + "loss": 0.7722, + "num_input_tokens_seen": 111115768, + "step": 1773 + }, + { + "epoch": 5.900166389351082, + "loss": 0.7136220932006836, + "loss_ce": 0.0007315138936974108, + "loss_iou": 0.263671875, + "loss_num": 0.036865234375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 111115768, + "step": 1773 + }, + { + "epoch": 5.903494176372712, + "grad_norm": 13.020090103149414, + "learning_rate": 5e-06, + "loss": 0.6273, + "num_input_tokens_seen": 111179252, + "step": 1774 + }, + { + "epoch": 5.903494176372712, + "loss": 0.5287013053894043, + "loss_ce": 1.4801251381868497e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0390625, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 111179252, + "step": 1774 + }, + { + "epoch": 5.906821963394343, + "grad_norm": 23.916812896728516, + "learning_rate": 5e-06, + "loss": 0.5319, + "num_input_tokens_seen": 111241816, + "step": 1775 + }, + { + "epoch": 5.906821963394343, + "loss": 0.443467378616333, + "loss_ce": 0.0006573410355485976, + "loss_iou": 0.1455078125, + "loss_num": 0.030517578125, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 111241816, + "step": 1775 + }, + { + "epoch": 5.9101497504159735, + "grad_norm": 19.583410263061523, + "learning_rate": 5e-06, + "loss": 0.5933, + "num_input_tokens_seen": 111304188, + "step": 1776 + }, + { + "epoch": 5.9101497504159735, + "loss": 0.5054777264595032, + "loss_ce": 0.00010664336150512099, + "loss_iou": 0.154296875, + "loss_num": 0.0390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 111304188, + "step": 1776 + }, + { + "epoch": 5.913477537437604, + "grad_norm": 13.995909690856934, + "learning_rate": 5e-06, + "loss": 0.7816, + "num_input_tokens_seen": 111364944, + "step": 1777 + }, + { + "epoch": 5.913477537437604, + "loss": 1.0646007061004639, + "loss_ce": 0.0003917526628356427, + "loss_iou": 0.361328125, + "loss_num": 0.06884765625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 111364944, + "step": 1777 + }, + { + "epoch": 5.916805324459235, + "grad_norm": 9.358750343322754, + "learning_rate": 5e-06, + "loss": 0.5087, + "num_input_tokens_seen": 111427100, + "step": 1778 + }, + { + "epoch": 5.916805324459235, + "loss": 0.7140218615531921, + "loss_ce": 0.0008870940655469894, + "loss_iou": 0.2734375, + "loss_num": 0.033447265625, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 111427100, + "step": 1778 + }, + { + "epoch": 5.920133111480865, + "grad_norm": 13.724045753479004, + "learning_rate": 5e-06, + "loss": 0.7266, + "num_input_tokens_seen": 111490448, + "step": 1779 + }, + { + "epoch": 5.920133111480865, + "loss": 0.6259258985519409, + "loss_ce": 1.0390387615188956e-05, + "loss_iou": 0.212890625, + "loss_num": 0.040283203125, + "loss_xval": 0.625, + "num_input_tokens_seen": 111490448, + "step": 1779 + }, + { + "epoch": 5.923460898502496, + "grad_norm": 10.216179847717285, + "learning_rate": 5e-06, + "loss": 0.6562, + "num_input_tokens_seen": 111554604, + "step": 1780 + }, + { + "epoch": 5.923460898502496, + "loss": 0.7411832809448242, + "loss_ce": 0.00046060560271143913, + "loss_iou": 0.30859375, + "loss_num": 0.024658203125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 111554604, + "step": 1780 + }, + { + "epoch": 5.9267886855241265, + "grad_norm": 17.054357528686523, + "learning_rate": 5e-06, + "loss": 0.7283, + "num_input_tokens_seen": 111617796, + "step": 1781 + }, + { + "epoch": 5.9267886855241265, + "loss": 0.9874345064163208, + "loss_ce": 0.0007401675684377551, + "loss_iou": 0.380859375, + "loss_num": 0.044677734375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 111617796, + "step": 1781 + }, + { + "epoch": 5.930116472545757, + "grad_norm": 14.132950782775879, + "learning_rate": 5e-06, + "loss": 0.8466, + "num_input_tokens_seen": 111679904, + "step": 1782 + }, + { + "epoch": 5.930116472545757, + "loss": 0.9425029754638672, + "loss_ce": 0.0010967559646815062, + "loss_iou": 0.35546875, + "loss_num": 0.046630859375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 111679904, + "step": 1782 + }, + { + "epoch": 5.933444259567388, + "grad_norm": 18.246789932250977, + "learning_rate": 5e-06, + "loss": 0.7549, + "num_input_tokens_seen": 111743964, + "step": 1783 + }, + { + "epoch": 5.933444259567388, + "loss": 0.9670125842094421, + "loss_ce": 0.00021572083642240614, + "loss_iou": 0.31640625, + "loss_num": 0.06640625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 111743964, + "step": 1783 + }, + { + "epoch": 5.936772046589018, + "grad_norm": 18.004018783569336, + "learning_rate": 5e-06, + "loss": 0.7019, + "num_input_tokens_seen": 111805552, + "step": 1784 + }, + { + "epoch": 5.936772046589018, + "loss": 0.8806197643280029, + "loss_ce": 4.619309947884176e-06, + "loss_iou": 0.322265625, + "loss_num": 0.047119140625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 111805552, + "step": 1784 + }, + { + "epoch": 5.940099833610649, + "grad_norm": 10.934527397155762, + "learning_rate": 5e-06, + "loss": 0.5895, + "num_input_tokens_seen": 111869060, + "step": 1785 + }, + { + "epoch": 5.940099833610649, + "loss": 0.5993823409080505, + "loss_ce": 1.712212906568311e-05, + "loss_iou": 0.2197265625, + "loss_num": 0.031982421875, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 111869060, + "step": 1785 + }, + { + "epoch": 5.94342762063228, + "grad_norm": 11.646177291870117, + "learning_rate": 5e-06, + "loss": 0.7022, + "num_input_tokens_seen": 111932476, + "step": 1786 + }, + { + "epoch": 5.94342762063228, + "loss": 0.7437913417816162, + "loss_ce": 1.6888494428712875e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 111932476, + "step": 1786 + }, + { + "epoch": 5.94675540765391, + "grad_norm": 9.77434253692627, + "learning_rate": 5e-06, + "loss": 0.3784, + "num_input_tokens_seen": 111995188, + "step": 1787 + }, + { + "epoch": 5.94675540765391, + "loss": 0.3339497745037079, + "loss_ce": 0.00014850017032586038, + "loss_iou": 0.1240234375, + "loss_num": 0.0172119140625, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 111995188, + "step": 1787 + }, + { + "epoch": 5.950083194675541, + "grad_norm": 9.4547700881958, + "learning_rate": 5e-06, + "loss": 0.6886, + "num_input_tokens_seen": 112059688, + "step": 1788 + }, + { + "epoch": 5.950083194675541, + "loss": 0.7533557415008545, + "loss_ce": 0.0010364109184592962, + "loss_iou": 0.251953125, + "loss_num": 0.0498046875, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 112059688, + "step": 1788 + }, + { + "epoch": 5.953410981697171, + "grad_norm": 10.41918659210205, + "learning_rate": 5e-06, + "loss": 0.4527, + "num_input_tokens_seen": 112122556, + "step": 1789 + }, + { + "epoch": 5.953410981697171, + "loss": 0.4796237349510193, + "loss_ce": 9.511190000921488e-06, + "loss_iou": 0.1328125, + "loss_num": 0.042724609375, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 112122556, + "step": 1789 + }, + { + "epoch": 5.956738768718802, + "grad_norm": 7.984902381896973, + "learning_rate": 5e-06, + "loss": 0.6002, + "num_input_tokens_seen": 112184820, + "step": 1790 + }, + { + "epoch": 5.956738768718802, + "loss": 0.5412213206291199, + "loss_ce": 0.0007550385198555887, + "loss_iou": 0.1865234375, + "loss_num": 0.033447265625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 112184820, + "step": 1790 + }, + { + "epoch": 5.960066555740433, + "grad_norm": 10.358345031738281, + "learning_rate": 5e-06, + "loss": 0.6859, + "num_input_tokens_seen": 112246916, + "step": 1791 + }, + { + "epoch": 5.960066555740433, + "loss": 0.5734908580780029, + "loss_ce": 4.511678980634315e-06, + "loss_iou": 0.1396484375, + "loss_num": 0.058837890625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 112246916, + "step": 1791 + }, + { + "epoch": 5.963394342762063, + "grad_norm": 9.835028648376465, + "learning_rate": 5e-06, + "loss": 0.7321, + "num_input_tokens_seen": 112309796, + "step": 1792 + }, + { + "epoch": 5.963394342762063, + "loss": 0.707068681716919, + "loss_ce": 3.736479629878886e-05, + "loss_iou": 0.2265625, + "loss_num": 0.05078125, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 112309796, + "step": 1792 + }, + { + "epoch": 5.966722129783694, + "grad_norm": 8.542290687561035, + "learning_rate": 5e-06, + "loss": 0.771, + "num_input_tokens_seen": 112372752, + "step": 1793 + }, + { + "epoch": 5.966722129783694, + "loss": 0.784670352935791, + "loss_ce": 0.00036859206738881767, + "loss_iou": 0.28125, + "loss_num": 0.044189453125, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 112372752, + "step": 1793 + }, + { + "epoch": 5.9700499168053245, + "grad_norm": 10.128270149230957, + "learning_rate": 5e-06, + "loss": 0.6849, + "num_input_tokens_seen": 112435784, + "step": 1794 + }, + { + "epoch": 5.9700499168053245, + "loss": 0.7285070419311523, + "loss_ce": 5.2509160013869405e-05, + "loss_iou": 0.2353515625, + "loss_num": 0.0517578125, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 112435784, + "step": 1794 + }, + { + "epoch": 5.973377703826955, + "grad_norm": 21.481040954589844, + "learning_rate": 5e-06, + "loss": 0.7537, + "num_input_tokens_seen": 112499088, + "step": 1795 + }, + { + "epoch": 5.973377703826955, + "loss": 0.4841929078102112, + "loss_ce": 0.000794428342487663, + "loss_iou": 0.158203125, + "loss_num": 0.033447265625, + "loss_xval": 0.484375, + "num_input_tokens_seen": 112499088, + "step": 1795 + }, + { + "epoch": 5.976705490848586, + "grad_norm": 17.77248764038086, + "learning_rate": 5e-06, + "loss": 0.5665, + "num_input_tokens_seen": 112562248, + "step": 1796 + }, + { + "epoch": 5.976705490848586, + "loss": 0.34294500946998596, + "loss_ce": 0.0002020865649683401, + "loss_iou": 0.1142578125, + "loss_num": 0.0228271484375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 112562248, + "step": 1796 + }, + { + "epoch": 5.980033277870216, + "grad_norm": 9.828653335571289, + "learning_rate": 5e-06, + "loss": 0.615, + "num_input_tokens_seen": 112624292, + "step": 1797 + }, + { + "epoch": 5.980033277870216, + "loss": 0.8139055371284485, + "loss_ce": 0.0006731028552167118, + "loss_iou": 0.27734375, + "loss_num": 0.052001953125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 112624292, + "step": 1797 + }, + { + "epoch": 5.983361064891847, + "grad_norm": 10.219780921936035, + "learning_rate": 5e-06, + "loss": 0.5873, + "num_input_tokens_seen": 112687100, + "step": 1798 + }, + { + "epoch": 5.983361064891847, + "loss": 0.6688265204429626, + "loss_ce": 3.2744080726843094e-06, + "loss_iou": 0.193359375, + "loss_num": 0.056640625, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 112687100, + "step": 1798 + }, + { + "epoch": 5.9866888519134775, + "grad_norm": 17.44321632385254, + "learning_rate": 5e-06, + "loss": 0.6239, + "num_input_tokens_seen": 112748904, + "step": 1799 + }, + { + "epoch": 5.9866888519134775, + "loss": 0.7041700482368469, + "loss_ce": 0.0011061305413022637, + "loss_iou": 0.29296875, + "loss_num": 0.02294921875, + "loss_xval": 0.703125, + "num_input_tokens_seen": 112748904, + "step": 1799 + }, + { + "epoch": 5.990016638935108, + "grad_norm": 20.630294799804688, + "learning_rate": 5e-06, + "loss": 0.7101, + "num_input_tokens_seen": 112810924, + "step": 1800 + }, + { + "epoch": 5.990016638935108, + "loss": 0.6245334148406982, + "loss_ce": 0.00014378642663359642, + "loss_iou": 0.22265625, + "loss_num": 0.035888671875, + "loss_xval": 0.625, + "num_input_tokens_seen": 112810924, + "step": 1800 + }, + { + "epoch": 5.993344425956739, + "grad_norm": 17.083229064941406, + "learning_rate": 5e-06, + "loss": 0.686, + "num_input_tokens_seen": 112875104, + "step": 1801 + }, + { + "epoch": 5.993344425956739, + "loss": 0.7393364310264587, + "loss_ce": 0.0003227871493436396, + "loss_iou": 0.275390625, + "loss_num": 0.03759765625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 112875104, + "step": 1801 + }, + { + "epoch": 5.996672212978369, + "grad_norm": 5.339547157287598, + "learning_rate": 5e-06, + "loss": 0.4334, + "num_input_tokens_seen": 112937884, + "step": 1802 + }, + { + "epoch": 5.996672212978369, + "loss": 0.4524919390678406, + "loss_ce": 0.00022140935470815748, + "loss_iou": 0.15234375, + "loss_num": 0.0296630859375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 112937884, + "step": 1802 + }, + { + "epoch": 6.0, + "grad_norm": 12.052851676940918, + "learning_rate": 5e-06, + "loss": 0.5541, + "num_input_tokens_seen": 113000836, + "step": 1803 + }, + { + "epoch": 6.0, + "loss": 0.6314157247543335, + "loss_ce": 0.00012909869838040322, + "loss_iou": 0.212890625, + "loss_num": 0.041015625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 113000836, + "step": 1803 + }, + { + "epoch": 6.003327787021631, + "grad_norm": 17.944114685058594, + "learning_rate": 5e-06, + "loss": 0.6348, + "num_input_tokens_seen": 113064196, + "step": 1804 + }, + { + "epoch": 6.003327787021631, + "loss": 0.6592069864273071, + "loss_ce": 2.7252650397713296e-05, + "loss_iou": 0.24609375, + "loss_num": 0.033203125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 113064196, + "step": 1804 + }, + { + "epoch": 6.006655574043261, + "grad_norm": 8.979909896850586, + "learning_rate": 5e-06, + "loss": 0.4955, + "num_input_tokens_seen": 113125704, + "step": 1805 + }, + { + "epoch": 6.006655574043261, + "loss": 0.5311020612716675, + "loss_ce": 0.0003403459268156439, + "loss_iou": 0.1611328125, + "loss_num": 0.04150390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 113125704, + "step": 1805 + }, + { + "epoch": 6.009983361064892, + "grad_norm": 23.03765869140625, + "learning_rate": 5e-06, + "loss": 0.5458, + "num_input_tokens_seen": 113189092, + "step": 1806 + }, + { + "epoch": 6.009983361064892, + "loss": 0.6970461010932922, + "loss_ce": 0.00026878988137468696, + "loss_iou": 0.2265625, + "loss_num": 0.048828125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 113189092, + "step": 1806 + }, + { + "epoch": 6.0133111480865225, + "grad_norm": 40.743404388427734, + "learning_rate": 5e-06, + "loss": 0.7297, + "num_input_tokens_seen": 113252312, + "step": 1807 + }, + { + "epoch": 6.0133111480865225, + "loss": 0.9480460286140442, + "loss_ce": 0.0007804275956004858, + "loss_iou": 0.345703125, + "loss_num": 0.05126953125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 113252312, + "step": 1807 + }, + { + "epoch": 6.016638935108153, + "grad_norm": 24.678939819335938, + "learning_rate": 5e-06, + "loss": 0.4995, + "num_input_tokens_seen": 113315144, + "step": 1808 + }, + { + "epoch": 6.016638935108153, + "loss": 0.6151464581489563, + "loss_ce": 0.00015620810154359788, + "loss_iou": 0.255859375, + "loss_num": 0.020751953125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 113315144, + "step": 1808 + }, + { + "epoch": 6.019966722129784, + "grad_norm": 8.93970775604248, + "learning_rate": 5e-06, + "loss": 0.6358, + "num_input_tokens_seen": 113378196, + "step": 1809 + }, + { + "epoch": 6.019966722129784, + "loss": 0.4480162262916565, + "loss_ce": 0.0001402330381097272, + "loss_iou": 0.169921875, + "loss_num": 0.0216064453125, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 113378196, + "step": 1809 + }, + { + "epoch": 6.023294509151414, + "grad_norm": 6.139615058898926, + "learning_rate": 5e-06, + "loss": 0.3159, + "num_input_tokens_seen": 113438784, + "step": 1810 + }, + { + "epoch": 6.023294509151414, + "loss": 0.3206220269203186, + "loss_ce": 4.330090632720385e-06, + "loss_iou": 0.07421875, + "loss_num": 0.034423828125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 113438784, + "step": 1810 + }, + { + "epoch": 6.026622296173045, + "grad_norm": 25.28673553466797, + "learning_rate": 5e-06, + "loss": 0.5646, + "num_input_tokens_seen": 113501584, + "step": 1811 + }, + { + "epoch": 6.026622296173045, + "loss": 0.47726649045944214, + "loss_ce": 0.0009481181623414159, + "loss_iou": 0.1396484375, + "loss_num": 0.03955078125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 113501584, + "step": 1811 + }, + { + "epoch": 6.0299500831946755, + "grad_norm": 23.174034118652344, + "learning_rate": 5e-06, + "loss": 0.6892, + "num_input_tokens_seen": 113564772, + "step": 1812 + }, + { + "epoch": 6.0299500831946755, + "loss": 0.638823390007019, + "loss_ce": 0.0003956563596148044, + "loss_iou": 0.2197265625, + "loss_num": 0.039794921875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 113564772, + "step": 1812 + }, + { + "epoch": 6.033277870216306, + "grad_norm": 9.570660591125488, + "learning_rate": 5e-06, + "loss": 0.482, + "num_input_tokens_seen": 113627164, + "step": 1813 + }, + { + "epoch": 6.033277870216306, + "loss": 0.5443622469902039, + "loss_ce": 0.00017276505241170526, + "loss_iou": 0.2158203125, + "loss_num": 0.0224609375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 113627164, + "step": 1813 + }, + { + "epoch": 6.036605657237937, + "grad_norm": 15.14444351196289, + "learning_rate": 5e-06, + "loss": 0.567, + "num_input_tokens_seen": 113690112, + "step": 1814 + }, + { + "epoch": 6.036605657237937, + "loss": 0.4782255291938782, + "loss_ce": 1.5094023183337413e-05, + "loss_iou": 0.1875, + "loss_num": 0.0206298828125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 113690112, + "step": 1814 + }, + { + "epoch": 6.039933444259567, + "grad_norm": 13.660589218139648, + "learning_rate": 5e-06, + "loss": 0.617, + "num_input_tokens_seen": 113753248, + "step": 1815 + }, + { + "epoch": 6.039933444259567, + "loss": 0.7184145450592041, + "loss_ce": 3.077809378737584e-05, + "loss_iou": 0.265625, + "loss_num": 0.037353515625, + "loss_xval": 0.71875, + "num_input_tokens_seen": 113753248, + "step": 1815 + }, + { + "epoch": 6.043261231281198, + "grad_norm": 9.653757095336914, + "learning_rate": 5e-06, + "loss": 0.9097, + "num_input_tokens_seen": 113817632, + "step": 1816 + }, + { + "epoch": 6.043261231281198, + "loss": 1.0042712688446045, + "loss_ce": 0.0004870828415732831, + "loss_iou": 0.33984375, + "loss_num": 0.064453125, + "loss_xval": 1.0, + "num_input_tokens_seen": 113817632, + "step": 1816 + }, + { + "epoch": 6.046589018302829, + "grad_norm": 10.754157066345215, + "learning_rate": 5e-06, + "loss": 0.791, + "num_input_tokens_seen": 113880676, + "step": 1817 + }, + { + "epoch": 6.046589018302829, + "loss": 0.6697897911071777, + "loss_ce": 0.00011203553731320426, + "loss_iou": 0.25390625, + "loss_num": 0.03271484375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 113880676, + "step": 1817 + }, + { + "epoch": 6.049916805324459, + "grad_norm": 12.63485336303711, + "learning_rate": 5e-06, + "loss": 0.5059, + "num_input_tokens_seen": 113941992, + "step": 1818 + }, + { + "epoch": 6.049916805324459, + "loss": 0.4937788248062134, + "loss_ce": 4.377495315566193e-06, + "loss_iou": 0.13671875, + "loss_num": 0.044189453125, + "loss_xval": 0.494140625, + "num_input_tokens_seen": 113941992, + "step": 1818 + }, + { + "epoch": 6.05324459234609, + "grad_norm": 6.852193832397461, + "learning_rate": 5e-06, + "loss": 0.5946, + "num_input_tokens_seen": 114003684, + "step": 1819 + }, + { + "epoch": 6.05324459234609, + "loss": 0.6906334161758423, + "loss_ce": 0.0008140857680700719, + "loss_iou": 0.259765625, + "loss_num": 0.033935546875, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 114003684, + "step": 1819 + }, + { + "epoch": 6.05657237936772, + "grad_norm": 14.351150512695312, + "learning_rate": 5e-06, + "loss": 0.7158, + "num_input_tokens_seen": 114065336, + "step": 1820 + }, + { + "epoch": 6.05657237936772, + "loss": 0.7381648421287537, + "loss_ce": 5.69224130231305e-06, + "loss_iou": 0.2421875, + "loss_num": 0.051025390625, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 114065336, + "step": 1820 + }, + { + "epoch": 6.059900166389351, + "grad_norm": 30.204614639282227, + "learning_rate": 5e-06, + "loss": 0.6653, + "num_input_tokens_seen": 114127604, + "step": 1821 + }, + { + "epoch": 6.059900166389351, + "loss": 0.6157270073890686, + "loss_ce": 0.0010419311001896858, + "loss_iou": 0.2265625, + "loss_num": 0.0322265625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 114127604, + "step": 1821 + }, + { + "epoch": 6.063227953410982, + "grad_norm": 9.658365249633789, + "learning_rate": 5e-06, + "loss": 0.5936, + "num_input_tokens_seen": 114190672, + "step": 1822 + }, + { + "epoch": 6.063227953410982, + "loss": 0.6472856402397156, + "loss_ce": 0.0008012944017536938, + "loss_iou": 0.23046875, + "loss_num": 0.037109375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 114190672, + "step": 1822 + }, + { + "epoch": 6.066555740432612, + "grad_norm": 18.5205135345459, + "learning_rate": 5e-06, + "loss": 0.6124, + "num_input_tokens_seen": 114253368, + "step": 1823 + }, + { + "epoch": 6.066555740432612, + "loss": 0.735488772392273, + "loss_ce": 0.0006254613981582224, + "loss_iou": 0.2314453125, + "loss_num": 0.054443359375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 114253368, + "step": 1823 + }, + { + "epoch": 6.069883527454243, + "grad_norm": 9.273344993591309, + "learning_rate": 5e-06, + "loss": 0.8266, + "num_input_tokens_seen": 114314064, + "step": 1824 + }, + { + "epoch": 6.069883527454243, + "loss": 0.7096292972564697, + "loss_ce": 0.0001566863793414086, + "loss_iou": 0.263671875, + "loss_num": 0.0361328125, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 114314064, + "step": 1824 + }, + { + "epoch": 6.0732113144758735, + "grad_norm": 10.49995231628418, + "learning_rate": 5e-06, + "loss": 0.4339, + "num_input_tokens_seen": 114374024, + "step": 1825 + }, + { + "epoch": 6.0732113144758735, + "loss": 0.4444683790206909, + "loss_ce": 1.035763489198871e-05, + "loss_iou": 0.10302734375, + "loss_num": 0.047607421875, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 114374024, + "step": 1825 + }, + { + "epoch": 6.076539101497504, + "grad_norm": 30.338027954101562, + "learning_rate": 5e-06, + "loss": 0.6147, + "num_input_tokens_seen": 114437368, + "step": 1826 + }, + { + "epoch": 6.076539101497504, + "loss": 0.7704422473907471, + "loss_ce": 0.0011551063507795334, + "loss_iou": 0.287109375, + "loss_num": 0.038818359375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 114437368, + "step": 1826 + }, + { + "epoch": 6.079866888519135, + "grad_norm": 21.672454833984375, + "learning_rate": 5e-06, + "loss": 0.69, + "num_input_tokens_seen": 114500248, + "step": 1827 + }, + { + "epoch": 6.079866888519135, + "loss": 0.42282021045684814, + "loss_ce": 0.0004569635493680835, + "loss_iou": 0.140625, + "loss_num": 0.0283203125, + "loss_xval": 0.421875, + "num_input_tokens_seen": 114500248, + "step": 1827 + }, + { + "epoch": 6.083194675540765, + "grad_norm": 12.467771530151367, + "learning_rate": 5e-06, + "loss": 0.5934, + "num_input_tokens_seen": 114562540, + "step": 1828 + }, + { + "epoch": 6.083194675540765, + "loss": 0.4341202676296234, + "loss_ce": 0.0006791083724237978, + "loss_iou": 0.134765625, + "loss_num": 0.03271484375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 114562540, + "step": 1828 + }, + { + "epoch": 6.086522462562396, + "grad_norm": 11.479426383972168, + "learning_rate": 5e-06, + "loss": 0.6028, + "num_input_tokens_seen": 114624740, + "step": 1829 + }, + { + "epoch": 6.086522462562396, + "loss": 0.5540913343429565, + "loss_ce": 1.4216830095392652e-05, + "loss_iou": 0.212890625, + "loss_num": 0.025634765625, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 114624740, + "step": 1829 + }, + { + "epoch": 6.0898502495840265, + "grad_norm": 14.962776184082031, + "learning_rate": 5e-06, + "loss": 0.7424, + "num_input_tokens_seen": 114689968, + "step": 1830 + }, + { + "epoch": 6.0898502495840265, + "loss": 0.5499890446662903, + "loss_ce": 0.000794704130385071, + "loss_iou": 0.189453125, + "loss_num": 0.033935546875, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 114689968, + "step": 1830 + }, + { + "epoch": 6.093178036605657, + "grad_norm": 46.97150421142578, + "learning_rate": 5e-06, + "loss": 0.8063, + "num_input_tokens_seen": 114755060, + "step": 1831 + }, + { + "epoch": 6.093178036605657, + "loss": 0.8358315825462341, + "loss_ce": 0.0001382491027470678, + "loss_iou": 0.27734375, + "loss_num": 0.056640625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 114755060, + "step": 1831 + }, + { + "epoch": 6.096505823627288, + "grad_norm": 24.191722869873047, + "learning_rate": 5e-06, + "loss": 0.5722, + "num_input_tokens_seen": 114817492, + "step": 1832 + }, + { + "epoch": 6.096505823627288, + "loss": 0.5860408544540405, + "loss_ce": 0.00022541148064192384, + "loss_iou": 0.216796875, + "loss_num": 0.03076171875, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 114817492, + "step": 1832 + }, + { + "epoch": 6.099833610648918, + "grad_norm": 12.793700218200684, + "learning_rate": 5e-06, + "loss": 0.7779, + "num_input_tokens_seen": 114881384, + "step": 1833 + }, + { + "epoch": 6.099833610648918, + "loss": 0.7877440452575684, + "loss_ce": 0.0002685006766114384, + "loss_iou": 0.279296875, + "loss_num": 0.045654296875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 114881384, + "step": 1833 + }, + { + "epoch": 6.103161397670549, + "grad_norm": 11.71887493133545, + "learning_rate": 5e-06, + "loss": 0.581, + "num_input_tokens_seen": 114943416, + "step": 1834 + }, + { + "epoch": 6.103161397670549, + "loss": 0.3188535273075104, + "loss_ce": 5.8923419601342175e-06, + "loss_iou": 0.0625, + "loss_num": 0.038818359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 114943416, + "step": 1834 + }, + { + "epoch": 6.10648918469218, + "grad_norm": 8.89264965057373, + "learning_rate": 5e-06, + "loss": 0.5685, + "num_input_tokens_seen": 115005544, + "step": 1835 + }, + { + "epoch": 6.10648918469218, + "loss": 0.5760512351989746, + "loss_ce": 1.4534259662468685e-06, + "loss_iou": 0.1689453125, + "loss_num": 0.047607421875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 115005544, + "step": 1835 + }, + { + "epoch": 6.10981697171381, + "grad_norm": 23.207515716552734, + "learning_rate": 5e-06, + "loss": 0.6417, + "num_input_tokens_seen": 115066228, + "step": 1836 + }, + { + "epoch": 6.10981697171381, + "loss": 0.5505459308624268, + "loss_ce": 0.001595688983798027, + "loss_iou": 0.2216796875, + "loss_num": 0.02099609375, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 115066228, + "step": 1836 + }, + { + "epoch": 6.113144758735441, + "grad_norm": 44.30556106567383, + "learning_rate": 5e-06, + "loss": 0.8094, + "num_input_tokens_seen": 115130620, + "step": 1837 + }, + { + "epoch": 6.113144758735441, + "loss": 0.7348802089691162, + "loss_ce": 0.0002610796072985977, + "loss_iou": 0.271484375, + "loss_num": 0.0380859375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 115130620, + "step": 1837 + }, + { + "epoch": 6.116472545757071, + "grad_norm": 24.386632919311523, + "learning_rate": 5e-06, + "loss": 0.6976, + "num_input_tokens_seen": 115192732, + "step": 1838 + }, + { + "epoch": 6.116472545757071, + "loss": 0.7894414663314819, + "loss_ce": 0.0007451603887602687, + "loss_iou": 0.283203125, + "loss_num": 0.044677734375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 115192732, + "step": 1838 + }, + { + "epoch": 6.119800332778702, + "grad_norm": 13.562687873840332, + "learning_rate": 5e-06, + "loss": 0.6654, + "num_input_tokens_seen": 115257288, + "step": 1839 + }, + { + "epoch": 6.119800332778702, + "loss": 0.803615152835846, + "loss_ce": 0.0008807668928056955, + "loss_iou": 0.33984375, + "loss_num": 0.024169921875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 115257288, + "step": 1839 + }, + { + "epoch": 6.123128119800333, + "grad_norm": 15.4951753616333, + "learning_rate": 5e-06, + "loss": 0.7436, + "num_input_tokens_seen": 115321772, + "step": 1840 + }, + { + "epoch": 6.123128119800333, + "loss": 0.7024877071380615, + "loss_ce": 0.0003393086954019964, + "loss_iou": 0.26953125, + "loss_num": 0.032958984375, + "loss_xval": 0.703125, + "num_input_tokens_seen": 115321772, + "step": 1840 + }, + { + "epoch": 6.126455906821963, + "grad_norm": 16.896167755126953, + "learning_rate": 5e-06, + "loss": 0.5496, + "num_input_tokens_seen": 115383640, + "step": 1841 + }, + { + "epoch": 6.126455906821963, + "loss": 0.6101754903793335, + "loss_ce": 0.0001900848001241684, + "loss_iou": 0.1611328125, + "loss_num": 0.057373046875, + "loss_xval": 0.609375, + "num_input_tokens_seen": 115383640, + "step": 1841 + }, + { + "epoch": 6.129783693843594, + "grad_norm": 25.171308517456055, + "learning_rate": 5e-06, + "loss": 0.5562, + "num_input_tokens_seen": 115445032, + "step": 1842 + }, + { + "epoch": 6.129783693843594, + "loss": 0.6438261270523071, + "loss_ce": 0.0011869219597429037, + "loss_iou": 0.185546875, + "loss_num": 0.05419921875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 115445032, + "step": 1842 + }, + { + "epoch": 6.1331114808652245, + "grad_norm": 17.913129806518555, + "learning_rate": 5e-06, + "loss": 0.3981, + "num_input_tokens_seen": 115506208, + "step": 1843 + }, + { + "epoch": 6.1331114808652245, + "loss": 0.3706634044647217, + "loss_ce": 5.7946737797465175e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.0174560546875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 115506208, + "step": 1843 + }, + { + "epoch": 6.136439267886855, + "grad_norm": 11.869161605834961, + "learning_rate": 5e-06, + "loss": 0.5245, + "num_input_tokens_seen": 115568952, + "step": 1844 + }, + { + "epoch": 6.136439267886855, + "loss": 0.6334899663925171, + "loss_ce": 0.0007995049236342311, + "loss_iou": 0.21484375, + "loss_num": 0.040283203125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 115568952, + "step": 1844 + }, + { + "epoch": 6.139767054908486, + "grad_norm": 19.262910842895508, + "learning_rate": 5e-06, + "loss": 0.6218, + "num_input_tokens_seen": 115631376, + "step": 1845 + }, + { + "epoch": 6.139767054908486, + "loss": 0.6551563143730164, + "loss_ce": 4.9998088798020035e-06, + "loss_iou": 0.2314453125, + "loss_num": 0.038818359375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 115631376, + "step": 1845 + }, + { + "epoch": 6.143094841930116, + "grad_norm": 16.876794815063477, + "learning_rate": 5e-06, + "loss": 0.5707, + "num_input_tokens_seen": 115694676, + "step": 1846 + }, + { + "epoch": 6.143094841930116, + "loss": 0.5590164661407471, + "loss_ce": 0.002131724264472723, + "loss_iou": 0.1826171875, + "loss_num": 0.038330078125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 115694676, + "step": 1846 + }, + { + "epoch": 6.146422628951747, + "grad_norm": 13.508707046508789, + "learning_rate": 5e-06, + "loss": 0.4522, + "num_input_tokens_seen": 115757384, + "step": 1847 + }, + { + "epoch": 6.146422628951747, + "loss": 0.39521026611328125, + "loss_ce": 0.00019072243594564497, + "loss_iou": 0.119140625, + "loss_num": 0.031494140625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 115757384, + "step": 1847 + }, + { + "epoch": 6.149750415973378, + "grad_norm": 24.865703582763672, + "learning_rate": 5e-06, + "loss": 0.6482, + "num_input_tokens_seen": 115820368, + "step": 1848 + }, + { + "epoch": 6.149750415973378, + "loss": 0.8225171566009521, + "loss_ce": 7.408522833429743e-06, + "loss_iou": 0.28515625, + "loss_num": 0.05078125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 115820368, + "step": 1848 + }, + { + "epoch": 6.153078202995008, + "grad_norm": 10.617776870727539, + "learning_rate": 5e-06, + "loss": 0.7869, + "num_input_tokens_seen": 115883888, + "step": 1849 + }, + { + "epoch": 6.153078202995008, + "loss": 0.7970017194747925, + "loss_ce": 4.6364102672669105e-06, + "loss_iou": 0.28125, + "loss_num": 0.047119140625, + "loss_xval": 0.796875, + "num_input_tokens_seen": 115883888, + "step": 1849 + }, + { + "epoch": 6.156405990016639, + "grad_norm": 13.21096134185791, + "learning_rate": 5e-06, + "loss": 0.6439, + "num_input_tokens_seen": 115947024, + "step": 1850 + }, + { + "epoch": 6.156405990016639, + "loss": 0.5439035892486572, + "loss_ce": 0.0003855030226986855, + "loss_iou": 0.1728515625, + "loss_num": 0.039306640625, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 115947024, + "step": 1850 + }, + { + "epoch": 6.159733777038269, + "grad_norm": 12.673142433166504, + "learning_rate": 5e-06, + "loss": 0.591, + "num_input_tokens_seen": 116008540, + "step": 1851 + }, + { + "epoch": 6.159733777038269, + "loss": 0.5002042651176453, + "loss_ce": 0.0006009893259033561, + "loss_iou": 0.13671875, + "loss_num": 0.045166015625, + "loss_xval": 0.5, + "num_input_tokens_seen": 116008540, + "step": 1851 + }, + { + "epoch": 6.1630615640599, + "grad_norm": 12.410579681396484, + "learning_rate": 5e-06, + "loss": 0.5276, + "num_input_tokens_seen": 116069856, + "step": 1852 + }, + { + "epoch": 6.1630615640599, + "loss": 0.6053777933120728, + "loss_ce": 0.00015313336916733533, + "loss_iou": 0.203125, + "loss_num": 0.03955078125, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 116069856, + "step": 1852 + }, + { + "epoch": 6.166389351081531, + "grad_norm": 10.24417781829834, + "learning_rate": 5e-06, + "loss": 0.6968, + "num_input_tokens_seen": 116132820, + "step": 1853 + }, + { + "epoch": 6.166389351081531, + "loss": 0.45372575521469116, + "loss_ce": 0.0004481864161789417, + "loss_iou": 0.1640625, + "loss_num": 0.02490234375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 116132820, + "step": 1853 + }, + { + "epoch": 6.169717138103161, + "grad_norm": 19.29709243774414, + "learning_rate": 5e-06, + "loss": 0.8242, + "num_input_tokens_seen": 116197008, + "step": 1854 + }, + { + "epoch": 6.169717138103161, + "loss": 0.7781339287757874, + "loss_ce": 0.0007902136421762407, + "loss_iou": 0.302734375, + "loss_num": 0.03466796875, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 116197008, + "step": 1854 + }, + { + "epoch": 6.173044925124792, + "grad_norm": 16.979761123657227, + "learning_rate": 5e-06, + "loss": 0.5969, + "num_input_tokens_seen": 116260052, + "step": 1855 + }, + { + "epoch": 6.173044925124792, + "loss": 0.5408188104629517, + "loss_ce": 4.733385139843449e-05, + "loss_iou": 0.1953125, + "loss_num": 0.0301513671875, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 116260052, + "step": 1855 + }, + { + "epoch": 6.1763727121464225, + "grad_norm": 25.549842834472656, + "learning_rate": 5e-06, + "loss": 0.8045, + "num_input_tokens_seen": 116322916, + "step": 1856 + }, + { + "epoch": 6.1763727121464225, + "loss": 0.9726593494415283, + "loss_ce": 0.0002472798223607242, + "loss_iou": 0.3671875, + "loss_num": 0.047607421875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 116322916, + "step": 1856 + }, + { + "epoch": 6.179700499168053, + "grad_norm": 11.069473266601562, + "learning_rate": 5e-06, + "loss": 0.6182, + "num_input_tokens_seen": 116386300, + "step": 1857 + }, + { + "epoch": 6.179700499168053, + "loss": 0.8058730363845825, + "loss_ce": 0.0003310354077257216, + "loss_iou": 0.28515625, + "loss_num": 0.046875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 116386300, + "step": 1857 + }, + { + "epoch": 6.183028286189684, + "grad_norm": 11.940938949584961, + "learning_rate": 5e-06, + "loss": 0.6844, + "num_input_tokens_seen": 116448856, + "step": 1858 + }, + { + "epoch": 6.183028286189684, + "loss": 0.8730907440185547, + "loss_ce": 0.0008984009618870914, + "loss_iou": 0.322265625, + "loss_num": 0.045654296875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 116448856, + "step": 1858 + }, + { + "epoch": 6.186356073211314, + "grad_norm": 26.180950164794922, + "learning_rate": 5e-06, + "loss": 0.6785, + "num_input_tokens_seen": 116513172, + "step": 1859 + }, + { + "epoch": 6.186356073211314, + "loss": 0.6941823959350586, + "loss_ce": 0.0003347777819726616, + "loss_iou": 0.298828125, + "loss_num": 0.01953125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 116513172, + "step": 1859 + }, + { + "epoch": 6.189683860232945, + "grad_norm": 38.279239654541016, + "learning_rate": 5e-06, + "loss": 0.5853, + "num_input_tokens_seen": 116575568, + "step": 1860 + }, + { + "epoch": 6.189683860232945, + "loss": 0.6565850973129272, + "loss_ce": 0.0003350722254253924, + "loss_iou": 0.220703125, + "loss_num": 0.043212890625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 116575568, + "step": 1860 + }, + { + "epoch": 6.1930116472545755, + "grad_norm": 23.869171142578125, + "learning_rate": 5e-06, + "loss": 0.5414, + "num_input_tokens_seen": 116638880, + "step": 1861 + }, + { + "epoch": 6.1930116472545755, + "loss": 0.48156753182411194, + "loss_ce": 0.0002443046832922846, + "loss_iou": 0.1630859375, + "loss_num": 0.03076171875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 116638880, + "step": 1861 + }, + { + "epoch": 6.196339434276206, + "grad_norm": 36.01377487182617, + "learning_rate": 5e-06, + "loss": 0.6001, + "num_input_tokens_seen": 116702460, + "step": 1862 + }, + { + "epoch": 6.196339434276206, + "loss": 0.5464353561401367, + "loss_ce": 0.0007810404058545828, + "loss_iou": 0.16796875, + "loss_num": 0.0419921875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 116702460, + "step": 1862 + }, + { + "epoch": 6.199667221297837, + "grad_norm": 21.87510108947754, + "learning_rate": 5e-06, + "loss": 0.6532, + "num_input_tokens_seen": 116765876, + "step": 1863 + }, + { + "epoch": 6.199667221297837, + "loss": 0.814652144908905, + "loss_ce": 0.00019903229258488864, + "loss_iou": 0.28515625, + "loss_num": 0.048583984375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 116765876, + "step": 1863 + }, + { + "epoch": 6.202995008319467, + "grad_norm": 7.908597946166992, + "learning_rate": 5e-06, + "loss": 0.5938, + "num_input_tokens_seen": 116828836, + "step": 1864 + }, + { + "epoch": 6.202995008319467, + "loss": 0.7178022265434265, + "loss_ce": 0.000639147125184536, + "loss_iou": 0.25390625, + "loss_num": 0.042236328125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 116828836, + "step": 1864 + }, + { + "epoch": 6.206322795341098, + "grad_norm": 11.291393280029297, + "learning_rate": 5e-06, + "loss": 0.5858, + "num_input_tokens_seen": 116890788, + "step": 1865 + }, + { + "epoch": 6.206322795341098, + "loss": 0.7302819490432739, + "loss_ce": 0.0004235214146319777, + "loss_iou": 0.2578125, + "loss_num": 0.04248046875, + "loss_xval": 0.73046875, + "num_input_tokens_seen": 116890788, + "step": 1865 + }, + { + "epoch": 6.209650582362729, + "grad_norm": 27.98455238342285, + "learning_rate": 5e-06, + "loss": 0.6189, + "num_input_tokens_seen": 116953012, + "step": 1866 + }, + { + "epoch": 6.209650582362729, + "loss": 0.7605868577957153, + "loss_ce": 8.884212729753926e-05, + "loss_iou": 0.275390625, + "loss_num": 0.041748046875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 116953012, + "step": 1866 + }, + { + "epoch": 6.212978369384359, + "grad_norm": 37.32789611816406, + "learning_rate": 5e-06, + "loss": 0.6325, + "num_input_tokens_seen": 117016728, + "step": 1867 + }, + { + "epoch": 6.212978369384359, + "loss": 0.5575933456420898, + "loss_ce": 0.0004644446889869869, + "loss_iou": 0.1875, + "loss_num": 0.036376953125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 117016728, + "step": 1867 + }, + { + "epoch": 6.21630615640599, + "grad_norm": 8.77429485321045, + "learning_rate": 5e-06, + "loss": 0.7978, + "num_input_tokens_seen": 117079776, + "step": 1868 + }, + { + "epoch": 6.21630615640599, + "loss": 0.5884289741516113, + "loss_ce": 0.0005383165553212166, + "loss_iou": 0.2197265625, + "loss_num": 0.029541015625, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 117079776, + "step": 1868 + }, + { + "epoch": 6.21963394342762, + "grad_norm": 13.149292945861816, + "learning_rate": 5e-06, + "loss": 0.4875, + "num_input_tokens_seen": 117142040, + "step": 1869 + }, + { + "epoch": 6.21963394342762, + "loss": 0.41048645973205566, + "loss_ce": 2.5050567273865454e-05, + "loss_iou": 0.140625, + "loss_num": 0.0260009765625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 117142040, + "step": 1869 + }, + { + "epoch": 6.222961730449251, + "grad_norm": 22.748687744140625, + "learning_rate": 5e-06, + "loss": 0.6843, + "num_input_tokens_seen": 117204720, + "step": 1870 + }, + { + "epoch": 6.222961730449251, + "loss": 0.6726725101470947, + "loss_ce": 6.513913831440732e-05, + "loss_iou": 0.228515625, + "loss_num": 0.04296875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 117204720, + "step": 1870 + }, + { + "epoch": 6.226289517470882, + "grad_norm": 25.107208251953125, + "learning_rate": 5e-06, + "loss": 0.5835, + "num_input_tokens_seen": 117268404, + "step": 1871 + }, + { + "epoch": 6.226289517470882, + "loss": 0.5766459703445435, + "loss_ce": 0.0013285500463098288, + "loss_iou": 0.1845703125, + "loss_num": 0.041259765625, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 117268404, + "step": 1871 + }, + { + "epoch": 6.229617304492512, + "grad_norm": 15.994500160217285, + "learning_rate": 5e-06, + "loss": 0.6522, + "num_input_tokens_seen": 117331168, + "step": 1872 + }, + { + "epoch": 6.229617304492512, + "loss": 0.7275434732437134, + "loss_ce": 4.387545232020784e-06, + "loss_iou": 0.2373046875, + "loss_num": 0.05029296875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 117331168, + "step": 1872 + }, + { + "epoch": 6.232945091514143, + "grad_norm": 14.865208625793457, + "learning_rate": 5e-06, + "loss": 0.4913, + "num_input_tokens_seen": 117394412, + "step": 1873 + }, + { + "epoch": 6.232945091514143, + "loss": 0.4301111400127411, + "loss_ce": 5.7414017646806315e-05, + "loss_iou": 0.11962890625, + "loss_num": 0.0380859375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 117394412, + "step": 1873 + }, + { + "epoch": 6.2362728785357735, + "grad_norm": 9.97636890411377, + "learning_rate": 5e-06, + "loss": 0.6502, + "num_input_tokens_seen": 117457176, + "step": 1874 + }, + { + "epoch": 6.2362728785357735, + "loss": 0.6446551084518433, + "loss_ce": 0.0006731529720127583, + "loss_iou": 0.212890625, + "loss_num": 0.043701171875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 117457176, + "step": 1874 + }, + { + "epoch": 6.239600665557404, + "grad_norm": 8.297738075256348, + "learning_rate": 5e-06, + "loss": 0.5623, + "num_input_tokens_seen": 117518696, + "step": 1875 + }, + { + "epoch": 6.239600665557404, + "loss": 0.5319926142692566, + "loss_ce": 0.00019328358757775277, + "loss_iou": 0.181640625, + "loss_num": 0.03369140625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 117518696, + "step": 1875 + }, + { + "epoch": 6.242928452579035, + "grad_norm": 7.489507675170898, + "learning_rate": 5e-06, + "loss": 0.5349, + "num_input_tokens_seen": 117581024, + "step": 1876 + }, + { + "epoch": 6.242928452579035, + "loss": 0.401066392660141, + "loss_ce": 4.387100489111617e-06, + "loss_iou": 0.1484375, + "loss_num": 0.020751953125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 117581024, + "step": 1876 + }, + { + "epoch": 6.246256239600665, + "grad_norm": 19.296171188354492, + "learning_rate": 5e-06, + "loss": 0.6984, + "num_input_tokens_seen": 117646404, + "step": 1877 + }, + { + "epoch": 6.246256239600665, + "loss": 0.7436515092849731, + "loss_ce": 0.0007315932889468968, + "loss_iou": 0.2890625, + "loss_num": 0.033447265625, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 117646404, + "step": 1877 + }, + { + "epoch": 6.249584026622296, + "grad_norm": 29.02984046936035, + "learning_rate": 5e-06, + "loss": 0.6827, + "num_input_tokens_seen": 117709528, + "step": 1878 + }, + { + "epoch": 6.249584026622296, + "loss": 0.7689893245697021, + "loss_ce": 7.337753686442738e-06, + "loss_iou": 0.296875, + "loss_num": 0.035400390625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 117709528, + "step": 1878 + }, + { + "epoch": 6.252911813643927, + "grad_norm": 45.192562103271484, + "learning_rate": 5e-06, + "loss": 0.6448, + "num_input_tokens_seen": 117772044, + "step": 1879 + }, + { + "epoch": 6.252911813643927, + "loss": 0.6723182201385498, + "loss_ce": 1.5984420315362513e-05, + "loss_iou": 0.26171875, + "loss_num": 0.029541015625, + "loss_xval": 0.671875, + "num_input_tokens_seen": 117772044, + "step": 1879 + }, + { + "epoch": 6.256239600665557, + "grad_norm": 29.018625259399414, + "learning_rate": 5e-06, + "loss": 0.5516, + "num_input_tokens_seen": 117834036, + "step": 1880 + }, + { + "epoch": 6.256239600665557, + "loss": 0.5316672921180725, + "loss_ce": 0.0001731569936964661, + "loss_iou": 0.171875, + "loss_num": 0.03759765625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 117834036, + "step": 1880 + }, + { + "epoch": 6.259567387687188, + "grad_norm": 23.841171264648438, + "learning_rate": 5e-06, + "loss": 0.7798, + "num_input_tokens_seen": 117895044, + "step": 1881 + }, + { + "epoch": 6.259567387687188, + "loss": 0.7895559072494507, + "loss_ce": 5.09536675963318e-06, + "loss_iou": 0.302734375, + "loss_num": 0.037109375, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 117895044, + "step": 1881 + }, + { + "epoch": 6.262895174708818, + "grad_norm": 28.532699584960938, + "learning_rate": 5e-06, + "loss": 0.5149, + "num_input_tokens_seen": 117956884, + "step": 1882 + }, + { + "epoch": 6.262895174708818, + "loss": 0.6902905702590942, + "loss_ce": 0.0007154002669267356, + "loss_iou": 0.21484375, + "loss_num": 0.052001953125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 117956884, + "step": 1882 + }, + { + "epoch": 6.266222961730449, + "grad_norm": 17.87752342224121, + "learning_rate": 5e-06, + "loss": 0.7271, + "num_input_tokens_seen": 118019364, + "step": 1883 + }, + { + "epoch": 6.266222961730449, + "loss": 0.7441762089729309, + "loss_ce": 3.5563556593842804e-05, + "loss_iou": 0.28125, + "loss_num": 0.0361328125, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 118019364, + "step": 1883 + }, + { + "epoch": 6.26955074875208, + "grad_norm": 25.544570922851562, + "learning_rate": 5e-06, + "loss": 0.6711, + "num_input_tokens_seen": 118082380, + "step": 1884 + }, + { + "epoch": 6.26955074875208, + "loss": 0.7013899087905884, + "loss_ce": 0.0003401026770006865, + "loss_iou": 0.2734375, + "loss_num": 0.030517578125, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 118082380, + "step": 1884 + }, + { + "epoch": 6.27287853577371, + "grad_norm": 21.333023071289062, + "learning_rate": 5e-06, + "loss": 0.6432, + "num_input_tokens_seen": 118145304, + "step": 1885 + }, + { + "epoch": 6.27287853577371, + "loss": 0.3440302908420563, + "loss_ce": 0.00015822664136067033, + "loss_iou": 0.12109375, + "loss_num": 0.0203857421875, + "loss_xval": 0.34375, + "num_input_tokens_seen": 118145304, + "step": 1885 + }, + { + "epoch": 6.276206322795341, + "grad_norm": 11.410968780517578, + "learning_rate": 5e-06, + "loss": 0.8493, + "num_input_tokens_seen": 118205404, + "step": 1886 + }, + { + "epoch": 6.276206322795341, + "loss": 0.6452671885490417, + "loss_ce": 3.5297755403007613e-06, + "loss_iou": 0.2080078125, + "loss_num": 0.045654296875, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 118205404, + "step": 1886 + }, + { + "epoch": 6.2795341098169715, + "grad_norm": 15.16508674621582, + "learning_rate": 5e-06, + "loss": 0.746, + "num_input_tokens_seen": 118268188, + "step": 1887 + }, + { + "epoch": 6.2795341098169715, + "loss": 0.8573170900344849, + "loss_ce": 0.00013937031326349825, + "loss_iou": 0.2890625, + "loss_num": 0.055419921875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 118268188, + "step": 1887 + }, + { + "epoch": 6.282861896838602, + "grad_norm": 19.193706512451172, + "learning_rate": 5e-06, + "loss": 0.5648, + "num_input_tokens_seen": 118331420, + "step": 1888 + }, + { + "epoch": 6.282861896838602, + "loss": 0.5550625324249268, + "loss_ce": 0.00037507241358980536, + "loss_iou": 0.228515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.5546875, + "num_input_tokens_seen": 118331420, + "step": 1888 + }, + { + "epoch": 6.286189683860233, + "grad_norm": 10.598845481872559, + "learning_rate": 5e-06, + "loss": 0.4831, + "num_input_tokens_seen": 118394204, + "step": 1889 + }, + { + "epoch": 6.286189683860233, + "loss": 0.45692452788352966, + "loss_ce": 0.00047309871297329664, + "loss_iou": 0.14453125, + "loss_num": 0.033447265625, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 118394204, + "step": 1889 + }, + { + "epoch": 6.289517470881863, + "grad_norm": 18.339231491088867, + "learning_rate": 5e-06, + "loss": 0.3393, + "num_input_tokens_seen": 118456836, + "step": 1890 + }, + { + "epoch": 6.289517470881863, + "loss": 0.35476088523864746, + "loss_ce": 2.4531174858566374e-05, + "loss_iou": 0.125, + "loss_num": 0.0208740234375, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 118456836, + "step": 1890 + }, + { + "epoch": 6.292845257903494, + "grad_norm": 42.98812484741211, + "learning_rate": 5e-06, + "loss": 0.9685, + "num_input_tokens_seen": 118520496, + "step": 1891 + }, + { + "epoch": 6.292845257903494, + "loss": 0.9763355255126953, + "loss_ce": 0.0005359815550036728, + "loss_iou": 0.3203125, + "loss_num": 0.0673828125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 118520496, + "step": 1891 + }, + { + "epoch": 6.2961730449251245, + "grad_norm": 7.72786283493042, + "learning_rate": 5e-06, + "loss": 0.5755, + "num_input_tokens_seen": 118583420, + "step": 1892 + }, + { + "epoch": 6.2961730449251245, + "loss": 0.615013599395752, + "loss_ce": 2.342194420634769e-05, + "loss_iou": 0.208984375, + "loss_num": 0.03955078125, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 118583420, + "step": 1892 + }, + { + "epoch": 6.299500831946755, + "grad_norm": 7.7686448097229, + "learning_rate": 5e-06, + "loss": 0.5277, + "num_input_tokens_seen": 118644200, + "step": 1893 + }, + { + "epoch": 6.299500831946755, + "loss": 0.6303321123123169, + "loss_ce": 8.307768439408392e-05, + "loss_iou": 0.1845703125, + "loss_num": 0.052490234375, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 118644200, + "step": 1893 + }, + { + "epoch": 6.302828618968386, + "grad_norm": 14.28267765045166, + "learning_rate": 5e-06, + "loss": 0.6133, + "num_input_tokens_seen": 118706948, + "step": 1894 + }, + { + "epoch": 6.302828618968386, + "loss": 0.6571691036224365, + "loss_ce": 0.00036982051096856594, + "loss_iou": 0.220703125, + "loss_num": 0.04296875, + "loss_xval": 0.65625, + "num_input_tokens_seen": 118706948, + "step": 1894 + }, + { + "epoch": 6.306156405990016, + "grad_norm": 14.473723411560059, + "learning_rate": 5e-06, + "loss": 0.7662, + "num_input_tokens_seen": 118770088, + "step": 1895 + }, + { + "epoch": 6.306156405990016, + "loss": 0.6408983469009399, + "loss_ce": 2.9183007427491248e-05, + "loss_iou": 0.19140625, + "loss_num": 0.052001953125, + "loss_xval": 0.640625, + "num_input_tokens_seen": 118770088, + "step": 1895 + }, + { + "epoch": 6.309484193011647, + "grad_norm": 11.187668800354004, + "learning_rate": 5e-06, + "loss": 0.8763, + "num_input_tokens_seen": 118833268, + "step": 1896 + }, + { + "epoch": 6.309484193011647, + "loss": 0.8932377099990845, + "loss_ce": 0.00029332979465834796, + "loss_iou": 0.322265625, + "loss_num": 0.04931640625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 118833268, + "step": 1896 + }, + { + "epoch": 6.312811980033278, + "grad_norm": 20.483861923217773, + "learning_rate": 5e-06, + "loss": 0.62, + "num_input_tokens_seen": 118895864, + "step": 1897 + }, + { + "epoch": 6.312811980033278, + "loss": 0.6993443965911865, + "loss_ce": 0.0007360352901741862, + "loss_iou": 0.224609375, + "loss_num": 0.0498046875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 118895864, + "step": 1897 + }, + { + "epoch": 6.316139767054908, + "grad_norm": 8.918088912963867, + "learning_rate": 5e-06, + "loss": 0.5872, + "num_input_tokens_seen": 118958404, + "step": 1898 + }, + { + "epoch": 6.316139767054908, + "loss": 0.6274863481521606, + "loss_ce": 0.0011435841443017125, + "loss_iou": 0.1982421875, + "loss_num": 0.046142578125, + "loss_xval": 0.625, + "num_input_tokens_seen": 118958404, + "step": 1898 + }, + { + "epoch": 6.319467554076539, + "grad_norm": 13.76313591003418, + "learning_rate": 5e-06, + "loss": 0.7277, + "num_input_tokens_seen": 119021880, + "step": 1899 + }, + { + "epoch": 6.319467554076539, + "loss": 0.6522097587585449, + "loss_ce": 0.0009646408725529909, + "loss_iou": 0.2255859375, + "loss_num": 0.0400390625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 119021880, + "step": 1899 + }, + { + "epoch": 6.322795341098169, + "grad_norm": 8.077730178833008, + "learning_rate": 5e-06, + "loss": 0.5365, + "num_input_tokens_seen": 119085200, + "step": 1900 + }, + { + "epoch": 6.322795341098169, + "loss": 0.3900187015533447, + "loss_ce": 4.0382278712058906e-06, + "loss_iou": 0.11328125, + "loss_num": 0.03271484375, + "loss_xval": 0.390625, + "num_input_tokens_seen": 119085200, + "step": 1900 + }, + { + "epoch": 6.3261231281198, + "grad_norm": 20.717100143432617, + "learning_rate": 5e-06, + "loss": 0.662, + "num_input_tokens_seen": 119149340, + "step": 1901 + }, + { + "epoch": 6.3261231281198, + "loss": 0.5592089891433716, + "loss_ce": 0.0011035435600206256, + "loss_iou": 0.2021484375, + "loss_num": 0.0308837890625, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 119149340, + "step": 1901 + }, + { + "epoch": 6.329450915141431, + "grad_norm": 11.843432426452637, + "learning_rate": 5e-06, + "loss": 0.5203, + "num_input_tokens_seen": 119209956, + "step": 1902 + }, + { + "epoch": 6.329450915141431, + "loss": 0.6044921875, + "loss_ce": 0.0002441465330775827, + "loss_iou": 0.23046875, + "loss_num": 0.02880859375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 119209956, + "step": 1902 + }, + { + "epoch": 6.332778702163061, + "grad_norm": 11.223464012145996, + "learning_rate": 5e-06, + "loss": 0.6212, + "num_input_tokens_seen": 119269532, + "step": 1903 + }, + { + "epoch": 6.332778702163061, + "loss": 0.6058506369590759, + "loss_ce": 1.5660873032175004e-05, + "loss_iou": 0.166015625, + "loss_num": 0.0546875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 119269532, + "step": 1903 + }, + { + "epoch": 6.336106489184692, + "grad_norm": 27.761043548583984, + "learning_rate": 5e-06, + "loss": 0.8632, + "num_input_tokens_seen": 119333156, + "step": 1904 + }, + { + "epoch": 6.336106489184692, + "loss": 0.6953175067901611, + "loss_ce": 5.025212885811925e-06, + "loss_iou": 0.2431640625, + "loss_num": 0.0419921875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 119333156, + "step": 1904 + }, + { + "epoch": 6.3394342762063225, + "grad_norm": 22.749303817749023, + "learning_rate": 5e-06, + "loss": 0.5647, + "num_input_tokens_seen": 119394532, + "step": 1905 + }, + { + "epoch": 6.3394342762063225, + "loss": 0.5662988424301147, + "loss_ce": 0.001113282167352736, + "loss_iou": 0.203125, + "loss_num": 0.03173828125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 119394532, + "step": 1905 + }, + { + "epoch": 6.342762063227953, + "grad_norm": 11.654192924499512, + "learning_rate": 5e-06, + "loss": 0.6636, + "num_input_tokens_seen": 119457736, + "step": 1906 + }, + { + "epoch": 6.342762063227953, + "loss": 0.8046139478683472, + "loss_ce": 0.0006588406395167112, + "loss_iou": 0.28515625, + "loss_num": 0.04638671875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 119457736, + "step": 1906 + }, + { + "epoch": 6.346089850249584, + "grad_norm": 25.141996383666992, + "learning_rate": 5e-06, + "loss": 0.6719, + "num_input_tokens_seen": 119521436, + "step": 1907 + }, + { + "epoch": 6.346089850249584, + "loss": 0.4867073893547058, + "loss_ce": 1.305484965996584e-05, + "loss_iou": 0.1826171875, + "loss_num": 0.0244140625, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 119521436, + "step": 1907 + }, + { + "epoch": 6.349417637271214, + "grad_norm": 8.515308380126953, + "learning_rate": 5e-06, + "loss": 0.5758, + "num_input_tokens_seen": 119583924, + "step": 1908 + }, + { + "epoch": 6.349417637271214, + "loss": 0.5766646862030029, + "loss_ce": 4.5053284338791855e-06, + "loss_iou": 0.23828125, + "loss_num": 0.02001953125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 119583924, + "step": 1908 + }, + { + "epoch": 6.352745424292845, + "grad_norm": 25.861650466918945, + "learning_rate": 5e-06, + "loss": 0.5609, + "num_input_tokens_seen": 119644324, + "step": 1909 + }, + { + "epoch": 6.352745424292845, + "loss": 0.5296666026115417, + "loss_ce": 3.480890427454142e-06, + "loss_iou": 0.201171875, + "loss_num": 0.025390625, + "loss_xval": 0.53125, + "num_input_tokens_seen": 119644324, + "step": 1909 + }, + { + "epoch": 6.356073211314476, + "grad_norm": 17.014137268066406, + "learning_rate": 5e-06, + "loss": 0.4881, + "num_input_tokens_seen": 119707332, + "step": 1910 + }, + { + "epoch": 6.356073211314476, + "loss": 0.32322195172309875, + "loss_ce": 0.00010183690028497949, + "loss_iou": 0.1259765625, + "loss_num": 0.01434326171875, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 119707332, + "step": 1910 + }, + { + "epoch": 6.359400998336106, + "grad_norm": 42.42847442626953, + "learning_rate": 5e-06, + "loss": 0.6808, + "num_input_tokens_seen": 119770608, + "step": 1911 + }, + { + "epoch": 6.359400998336106, + "loss": 0.4963526129722595, + "loss_ce": 1.4706164620292839e-05, + "loss_iou": 0.14453125, + "loss_num": 0.041748046875, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 119770608, + "step": 1911 + }, + { + "epoch": 6.362728785357737, + "grad_norm": 90.03009033203125, + "learning_rate": 5e-06, + "loss": 0.6118, + "num_input_tokens_seen": 119832564, + "step": 1912 + }, + { + "epoch": 6.362728785357737, + "loss": 0.5688121914863586, + "loss_ce": 1.0294821549905464e-05, + "loss_iou": 0.208984375, + "loss_num": 0.030029296875, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 119832564, + "step": 1912 + }, + { + "epoch": 6.366056572379367, + "grad_norm": 21.129972457885742, + "learning_rate": 5e-06, + "loss": 0.7614, + "num_input_tokens_seen": 119895428, + "step": 1913 + }, + { + "epoch": 6.366056572379367, + "loss": 0.6494866609573364, + "loss_ce": 0.0001946888369275257, + "loss_iou": 0.203125, + "loss_num": 0.048828125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 119895428, + "step": 1913 + }, + { + "epoch": 6.369384359400998, + "grad_norm": 23.620243072509766, + "learning_rate": 5e-06, + "loss": 0.8151, + "num_input_tokens_seen": 119958696, + "step": 1914 + }, + { + "epoch": 6.369384359400998, + "loss": 0.8362605571746826, + "loss_ce": 7.890413689892739e-05, + "loss_iou": 0.30078125, + "loss_num": 0.047119140625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 119958696, + "step": 1914 + }, + { + "epoch": 6.372712146422629, + "grad_norm": 16.94724464416504, + "learning_rate": 5e-06, + "loss": 0.552, + "num_input_tokens_seen": 120021096, + "step": 1915 + }, + { + "epoch": 6.372712146422629, + "loss": 0.6032967567443848, + "loss_ce": 0.0002694588038139045, + "loss_iou": 0.2392578125, + "loss_num": 0.0247802734375, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 120021096, + "step": 1915 + }, + { + "epoch": 6.376039933444259, + "grad_norm": 15.850129127502441, + "learning_rate": 5e-06, + "loss": 0.5145, + "num_input_tokens_seen": 120081864, + "step": 1916 + }, + { + "epoch": 6.376039933444259, + "loss": 0.4399433135986328, + "loss_ce": 1.9184694792784285e-06, + "loss_iou": 0.140625, + "loss_num": 0.03173828125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 120081864, + "step": 1916 + }, + { + "epoch": 6.37936772046589, + "grad_norm": 10.974051475524902, + "learning_rate": 5e-06, + "loss": 0.7344, + "num_input_tokens_seen": 120145864, + "step": 1917 + }, + { + "epoch": 6.37936772046589, + "loss": 0.6459264755249023, + "loss_ce": 0.0019140413496643305, + "loss_iou": 0.2490234375, + "loss_num": 0.029052734375, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 120145864, + "step": 1917 + }, + { + "epoch": 6.3826955074875205, + "grad_norm": 22.09915542602539, + "learning_rate": 5e-06, + "loss": 0.5428, + "num_input_tokens_seen": 120208220, + "step": 1918 + }, + { + "epoch": 6.3826955074875205, + "loss": 0.6734851598739624, + "loss_ce": 0.0007557276985608041, + "loss_iou": 0.1611328125, + "loss_num": 0.0703125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 120208220, + "step": 1918 + }, + { + "epoch": 6.386023294509151, + "grad_norm": 17.634904861450195, + "learning_rate": 5e-06, + "loss": 0.7517, + "num_input_tokens_seen": 120269832, + "step": 1919 + }, + { + "epoch": 6.386023294509151, + "loss": 0.6596292853355408, + "loss_ce": 8.335949678439647e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.035400390625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 120269832, + "step": 1919 + }, + { + "epoch": 6.389351081530782, + "grad_norm": 16.73068618774414, + "learning_rate": 5e-06, + "loss": 0.4661, + "num_input_tokens_seen": 120329824, + "step": 1920 + }, + { + "epoch": 6.389351081530782, + "loss": 0.5087909698486328, + "loss_ce": 1.963873046406661e-06, + "loss_iou": 0.13671875, + "loss_num": 0.046875, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 120329824, + "step": 1920 + }, + { + "epoch": 6.392678868552412, + "grad_norm": 26.34819984436035, + "learning_rate": 5e-06, + "loss": 0.6769, + "num_input_tokens_seen": 120392212, + "step": 1921 + }, + { + "epoch": 6.392678868552412, + "loss": 0.42969781160354614, + "loss_ce": 0.000803784467279911, + "loss_iou": 0.1328125, + "loss_num": 0.032958984375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 120392212, + "step": 1921 + }, + { + "epoch": 6.396006655574043, + "grad_norm": 6.768531799316406, + "learning_rate": 5e-06, + "loss": 0.6683, + "num_input_tokens_seen": 120454584, + "step": 1922 + }, + { + "epoch": 6.396006655574043, + "loss": 0.9003890752792358, + "loss_ce": 0.00024259740894194692, + "loss_iou": 0.318359375, + "loss_num": 0.052734375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 120454584, + "step": 1922 + }, + { + "epoch": 6.3993344425956735, + "grad_norm": 8.30113697052002, + "learning_rate": 5e-06, + "loss": 0.6405, + "num_input_tokens_seen": 120514044, + "step": 1923 + }, + { + "epoch": 6.3993344425956735, + "loss": 0.7188661098480225, + "loss_ce": 0.00011609155626501888, + "loss_iou": 0.1796875, + "loss_num": 0.07177734375, + "loss_xval": 0.71875, + "num_input_tokens_seen": 120514044, + "step": 1923 + }, + { + "epoch": 6.402662229617304, + "grad_norm": 15.520536422729492, + "learning_rate": 5e-06, + "loss": 0.7244, + "num_input_tokens_seen": 120576424, + "step": 1924 + }, + { + "epoch": 6.402662229617304, + "loss": 0.5446481108665466, + "loss_ce": 0.00033657567109912634, + "loss_iou": 0.1962890625, + "loss_num": 0.0303955078125, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 120576424, + "step": 1924 + }, + { + "epoch": 6.405990016638935, + "grad_norm": 12.706613540649414, + "learning_rate": 5e-06, + "loss": 0.5809, + "num_input_tokens_seen": 120638852, + "step": 1925 + }, + { + "epoch": 6.405990016638935, + "loss": 0.6418596506118774, + "loss_ce": 0.0006242538802325726, + "loss_iou": 0.224609375, + "loss_num": 0.03857421875, + "loss_xval": 0.640625, + "num_input_tokens_seen": 120638852, + "step": 1925 + }, + { + "epoch": 6.409317803660565, + "grad_norm": 12.72490119934082, + "learning_rate": 5e-06, + "loss": 0.5454, + "num_input_tokens_seen": 120701328, + "step": 1926 + }, + { + "epoch": 6.409317803660565, + "loss": 0.5837253332138062, + "loss_ce": 0.0001071855440386571, + "loss_iou": 0.220703125, + "loss_num": 0.02880859375, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 120701328, + "step": 1926 + }, + { + "epoch": 6.412645590682196, + "grad_norm": 15.018410682678223, + "learning_rate": 5e-06, + "loss": 0.5549, + "num_input_tokens_seen": 120763848, + "step": 1927 + }, + { + "epoch": 6.412645590682196, + "loss": 0.5490936040878296, + "loss_ce": 0.0002655147691257298, + "loss_iou": 0.1669921875, + "loss_num": 0.043212890625, + "loss_xval": 0.546875, + "num_input_tokens_seen": 120763848, + "step": 1927 + }, + { + "epoch": 6.415973377703827, + "grad_norm": 14.566852569580078, + "learning_rate": 5e-06, + "loss": 0.8448, + "num_input_tokens_seen": 120826228, + "step": 1928 + }, + { + "epoch": 6.415973377703827, + "loss": 1.2861429452896118, + "loss_ce": 1.0124242180609144e-05, + "loss_iou": 0.4453125, + "loss_num": 0.07861328125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 120826228, + "step": 1928 + }, + { + "epoch": 6.419301164725457, + "grad_norm": 11.734906196594238, + "learning_rate": 5e-06, + "loss": 0.584, + "num_input_tokens_seen": 120887768, + "step": 1929 + }, + { + "epoch": 6.419301164725457, + "loss": 0.5992169976234436, + "loss_ce": 0.0011334889568388462, + "loss_iou": 0.19921875, + "loss_num": 0.0400390625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 120887768, + "step": 1929 + }, + { + "epoch": 6.422628951747088, + "grad_norm": 15.214763641357422, + "learning_rate": 5e-06, + "loss": 0.6457, + "num_input_tokens_seen": 120950624, + "step": 1930 + }, + { + "epoch": 6.422628951747088, + "loss": 0.8110338449478149, + "loss_ce": 0.00018182062194682658, + "loss_iou": 0.279296875, + "loss_num": 0.050048828125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 120950624, + "step": 1930 + }, + { + "epoch": 6.425956738768718, + "grad_norm": 28.60011863708496, + "learning_rate": 5e-06, + "loss": 0.6793, + "num_input_tokens_seen": 121013536, + "step": 1931 + }, + { + "epoch": 6.425956738768718, + "loss": 0.49274346232414246, + "loss_ce": 0.0004949223366566002, + "loss_iou": 0.1591796875, + "loss_num": 0.034912109375, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 121013536, + "step": 1931 + }, + { + "epoch": 6.429284525790349, + "grad_norm": 12.213953971862793, + "learning_rate": 5e-06, + "loss": 0.7479, + "num_input_tokens_seen": 121075756, + "step": 1932 + }, + { + "epoch": 6.429284525790349, + "loss": 0.773937463760376, + "loss_ce": 1.1653934961941559e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.064453125, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 121075756, + "step": 1932 + }, + { + "epoch": 6.43261231281198, + "grad_norm": 15.571845054626465, + "learning_rate": 5e-06, + "loss": 0.6698, + "num_input_tokens_seen": 121139652, + "step": 1933 + }, + { + "epoch": 6.43261231281198, + "loss": 0.6814624071121216, + "loss_ce": 0.0027514593675732613, + "loss_iou": 0.27734375, + "loss_num": 0.02490234375, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 121139652, + "step": 1933 + }, + { + "epoch": 6.43594009983361, + "grad_norm": 13.807389259338379, + "learning_rate": 5e-06, + "loss": 0.9253, + "num_input_tokens_seen": 121203024, + "step": 1934 + }, + { + "epoch": 6.43594009983361, + "loss": 1.345597505569458, + "loss_ce": 0.0008709065150469542, + "loss_iou": 0.466796875, + "loss_num": 0.08251953125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 121203024, + "step": 1934 + }, + { + "epoch": 6.439267886855241, + "grad_norm": 12.07575511932373, + "learning_rate": 5e-06, + "loss": 0.5334, + "num_input_tokens_seen": 121266420, + "step": 1935 + }, + { + "epoch": 6.439267886855241, + "loss": 0.5801987648010254, + "loss_ce": 0.00012065586633980274, + "loss_iou": 0.1796875, + "loss_num": 0.044189453125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 121266420, + "step": 1935 + }, + { + "epoch": 6.4425956738768715, + "grad_norm": 12.738320350646973, + "learning_rate": 5e-06, + "loss": 0.7129, + "num_input_tokens_seen": 121330352, + "step": 1936 + }, + { + "epoch": 6.4425956738768715, + "loss": 0.656618595123291, + "loss_ce": 0.00012446939945220947, + "loss_iou": 0.2197265625, + "loss_num": 0.04345703125, + "loss_xval": 0.65625, + "num_input_tokens_seen": 121330352, + "step": 1936 + }, + { + "epoch": 6.445923460898502, + "grad_norm": 14.938615798950195, + "learning_rate": 5e-06, + "loss": 0.6309, + "num_input_tokens_seen": 121394128, + "step": 1937 + }, + { + "epoch": 6.445923460898502, + "loss": 0.8611018657684326, + "loss_ce": 0.0016048324760049582, + "loss_iou": 0.30859375, + "loss_num": 0.048828125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 121394128, + "step": 1937 + }, + { + "epoch": 6.449251247920133, + "grad_norm": 21.565048217773438, + "learning_rate": 5e-06, + "loss": 0.5475, + "num_input_tokens_seen": 121457512, + "step": 1938 + }, + { + "epoch": 6.449251247920133, + "loss": 0.39604151248931885, + "loss_ce": 4.542524766293354e-05, + "loss_iou": 0.126953125, + "loss_num": 0.028564453125, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 121457512, + "step": 1938 + }, + { + "epoch": 6.452579034941763, + "grad_norm": 35.06698989868164, + "learning_rate": 5e-06, + "loss": 0.8538, + "num_input_tokens_seen": 121520800, + "step": 1939 + }, + { + "epoch": 6.452579034941763, + "loss": 0.8477818965911865, + "loss_ce": 3.5593729990068823e-06, + "loss_iou": 0.265625, + "loss_num": 0.0634765625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 121520800, + "step": 1939 + }, + { + "epoch": 6.455906821963394, + "grad_norm": 45.28481674194336, + "learning_rate": 5e-06, + "loss": 0.97, + "num_input_tokens_seen": 121585016, + "step": 1940 + }, + { + "epoch": 6.455906821963394, + "loss": 0.9006417989730835, + "loss_ce": 7.0053438321338035e-06, + "loss_iou": 0.306640625, + "loss_num": 0.05810546875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 121585016, + "step": 1940 + }, + { + "epoch": 6.4592346089850246, + "grad_norm": 39.1356086730957, + "learning_rate": 5e-06, + "loss": 0.6982, + "num_input_tokens_seen": 121647252, + "step": 1941 + }, + { + "epoch": 6.4592346089850246, + "loss": 0.6142480969429016, + "loss_ce": 0.0001123529946198687, + "loss_iou": 0.212890625, + "loss_num": 0.03759765625, + "loss_xval": 0.61328125, + "num_input_tokens_seen": 121647252, + "step": 1941 + }, + { + "epoch": 6.462562396006655, + "grad_norm": 37.34075164794922, + "learning_rate": 5e-06, + "loss": 0.6763, + "num_input_tokens_seen": 121710464, + "step": 1942 + }, + { + "epoch": 6.462562396006655, + "loss": 0.7514750957489014, + "loss_ce": 1.022259584715357e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0299072265625, + "loss_xval": 0.75, + "num_input_tokens_seen": 121710464, + "step": 1942 + }, + { + "epoch": 6.465890183028286, + "grad_norm": 28.683731079101562, + "learning_rate": 5e-06, + "loss": 0.5492, + "num_input_tokens_seen": 121771684, + "step": 1943 + }, + { + "epoch": 6.465890183028286, + "loss": 0.34131908416748047, + "loss_ce": 0.0007429145043715835, + "loss_iou": 0.080078125, + "loss_num": 0.0361328125, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 121771684, + "step": 1943 + }, + { + "epoch": 6.469217970049916, + "grad_norm": 16.91063690185547, + "learning_rate": 5e-06, + "loss": 0.6058, + "num_input_tokens_seen": 121836040, + "step": 1944 + }, + { + "epoch": 6.469217970049916, + "loss": 0.6735157370567322, + "loss_ce": 0.0002979549753945321, + "loss_iou": 0.28125, + "loss_num": 0.0223388671875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 121836040, + "step": 1944 + }, + { + "epoch": 6.472545757071547, + "grad_norm": 20.4154109954834, + "learning_rate": 5e-06, + "loss": 0.6285, + "num_input_tokens_seen": 121895316, + "step": 1945 + }, + { + "epoch": 6.472545757071547, + "loss": 0.8378450274467468, + "loss_ce": 7.650956104043871e-05, + "loss_iou": 0.2470703125, + "loss_num": 0.06884765625, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 121895316, + "step": 1945 + }, + { + "epoch": 6.475873544093178, + "grad_norm": 42.6540412902832, + "learning_rate": 5e-06, + "loss": 0.7617, + "num_input_tokens_seen": 121958684, + "step": 1946 + }, + { + "epoch": 6.475873544093178, + "loss": 1.069746732711792, + "loss_ce": 0.0015093846013769507, + "loss_iou": 0.38671875, + "loss_num": 0.05859375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 121958684, + "step": 1946 + }, + { + "epoch": 6.479201331114808, + "grad_norm": 15.946682929992676, + "learning_rate": 5e-06, + "loss": 0.7402, + "num_input_tokens_seen": 122020744, + "step": 1947 + }, + { + "epoch": 6.479201331114808, + "loss": 0.6518175601959229, + "loss_ce": 0.00026731760590337217, + "loss_iou": 0.154296875, + "loss_num": 0.06884765625, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 122020744, + "step": 1947 + }, + { + "epoch": 6.482529118136439, + "grad_norm": 10.553583145141602, + "learning_rate": 5e-06, + "loss": 0.5386, + "num_input_tokens_seen": 122083412, + "step": 1948 + }, + { + "epoch": 6.482529118136439, + "loss": 0.7277462482452393, + "loss_ce": 0.00020720537577290088, + "loss_iou": 0.298828125, + "loss_num": 0.0257568359375, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 122083412, + "step": 1948 + }, + { + "epoch": 6.4858569051580695, + "grad_norm": 11.314140319824219, + "learning_rate": 5e-06, + "loss": 0.7428, + "num_input_tokens_seen": 122145288, + "step": 1949 + }, + { + "epoch": 6.4858569051580695, + "loss": 0.7091948986053467, + "loss_ce": 8.838271605782211e-05, + "loss_iou": 0.2578125, + "loss_num": 0.038818359375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 122145288, + "step": 1949 + }, + { + "epoch": 6.4891846921797, + "grad_norm": 67.86624145507812, + "learning_rate": 5e-06, + "loss": 0.7234, + "num_input_tokens_seen": 122208008, + "step": 1950 + }, + { + "epoch": 6.4891846921797, + "loss": 0.6662644147872925, + "loss_ce": 4.646051820600405e-06, + "loss_iou": 0.263671875, + "loss_num": 0.0277099609375, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 122208008, + "step": 1950 + }, + { + "epoch": 6.492512479201331, + "grad_norm": 15.304253578186035, + "learning_rate": 5e-06, + "loss": 0.5958, + "num_input_tokens_seen": 122268396, + "step": 1951 + }, + { + "epoch": 6.492512479201331, + "loss": 0.680047869682312, + "loss_ce": 0.00011620179429883137, + "loss_iou": 0.19140625, + "loss_num": 0.0595703125, + "loss_xval": 0.6796875, + "num_input_tokens_seen": 122268396, + "step": 1951 + }, + { + "epoch": 6.495840266222961, + "grad_norm": 13.426730155944824, + "learning_rate": 5e-06, + "loss": 0.6756, + "num_input_tokens_seen": 122331456, + "step": 1952 + }, + { + "epoch": 6.495840266222961, + "loss": 0.5398004055023193, + "loss_ce": 5.481046173372306e-06, + "loss_iou": 0.1591796875, + "loss_num": 0.04443359375, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 122331456, + "step": 1952 + }, + { + "epoch": 6.499168053244592, + "grad_norm": 6.796942710876465, + "learning_rate": 5e-06, + "loss": 0.5479, + "num_input_tokens_seen": 122395172, + "step": 1953 + }, + { + "epoch": 6.499168053244592, + "loss": 0.40055206418037415, + "loss_ce": 0.0006497133290395141, + "loss_iou": 0.13671875, + "loss_num": 0.0252685546875, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 122395172, + "step": 1953 + }, + { + "epoch": 6.5024958402662225, + "grad_norm": 8.822606086730957, + "learning_rate": 5e-06, + "loss": 0.6339, + "num_input_tokens_seen": 122456692, + "step": 1954 + }, + { + "epoch": 6.5024958402662225, + "loss": 0.6271539330482483, + "loss_ce": 0.0003228959976695478, + "loss_iou": 0.203125, + "loss_num": 0.0439453125, + "loss_xval": 0.625, + "num_input_tokens_seen": 122456692, + "step": 1954 + }, + { + "epoch": 6.505823627287853, + "grad_norm": 14.753844261169434, + "learning_rate": 5e-06, + "loss": 0.7122, + "num_input_tokens_seen": 122517856, + "step": 1955 + }, + { + "epoch": 6.505823627287853, + "loss": 0.7032543420791626, + "loss_ce": 0.00012933007383253425, + "loss_iou": 0.15234375, + "loss_num": 0.080078125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 122517856, + "step": 1955 + }, + { + "epoch": 6.509151414309484, + "grad_norm": 15.91337776184082, + "learning_rate": 5e-06, + "loss": 0.5987, + "num_input_tokens_seen": 122581004, + "step": 1956 + }, + { + "epoch": 6.509151414309484, + "loss": 0.6297885179519653, + "loss_ce": 0.00014987353642936796, + "loss_iou": 0.236328125, + "loss_num": 0.031494140625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 122581004, + "step": 1956 + }, + { + "epoch": 6.512479201331114, + "grad_norm": 9.528816223144531, + "learning_rate": 5e-06, + "loss": 0.6616, + "num_input_tokens_seen": 122644628, + "step": 1957 + }, + { + "epoch": 6.512479201331114, + "loss": 0.6756832003593445, + "loss_ce": 0.00039023166755214334, + "loss_iou": 0.2314453125, + "loss_num": 0.04248046875, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 122644628, + "step": 1957 + }, + { + "epoch": 6.515806988352745, + "grad_norm": 27.174596786499023, + "learning_rate": 5e-06, + "loss": 0.6126, + "num_input_tokens_seen": 122708080, + "step": 1958 + }, + { + "epoch": 6.515806988352745, + "loss": 0.6474884152412415, + "loss_ce": 0.00027161132311448455, + "loss_iou": 0.251953125, + "loss_num": 0.028564453125, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 122708080, + "step": 1958 + }, + { + "epoch": 6.519134775374376, + "grad_norm": 19.91985321044922, + "learning_rate": 5e-06, + "loss": 0.5484, + "num_input_tokens_seen": 122770044, + "step": 1959 + }, + { + "epoch": 6.519134775374376, + "loss": 0.693800687789917, + "loss_ce": 1.4089351680013351e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0267333984375, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 122770044, + "step": 1959 + }, + { + "epoch": 6.522462562396006, + "grad_norm": 8.022729873657227, + "learning_rate": 5e-06, + "loss": 0.576, + "num_input_tokens_seen": 122833492, + "step": 1960 + }, + { + "epoch": 6.522462562396006, + "loss": 0.7717412114143372, + "loss_ce": 0.00013476284220814705, + "loss_iou": 0.2890625, + "loss_num": 0.0390625, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 122833492, + "step": 1960 + }, + { + "epoch": 6.525790349417637, + "grad_norm": 18.420011520385742, + "learning_rate": 5e-06, + "loss": 0.5904, + "num_input_tokens_seen": 122896032, + "step": 1961 + }, + { + "epoch": 6.525790349417637, + "loss": 0.7451998591423035, + "loss_ce": 2.1647940229740925e-05, + "loss_iou": 0.267578125, + "loss_num": 0.041748046875, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 122896032, + "step": 1961 + }, + { + "epoch": 6.529118136439267, + "grad_norm": 14.324840545654297, + "learning_rate": 5e-06, + "loss": 0.6027, + "num_input_tokens_seen": 122957832, + "step": 1962 + }, + { + "epoch": 6.529118136439267, + "loss": 0.6533394455909729, + "loss_ce": 0.0006295054336078465, + "loss_iou": 0.220703125, + "loss_num": 0.04248046875, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 122957832, + "step": 1962 + }, + { + "epoch": 6.532445923460898, + "grad_norm": 11.380367279052734, + "learning_rate": 5e-06, + "loss": 0.6411, + "num_input_tokens_seen": 123019992, + "step": 1963 + }, + { + "epoch": 6.532445923460898, + "loss": 0.6608841419219971, + "loss_ce": 0.00048379399231635034, + "loss_iou": 0.224609375, + "loss_num": 0.042236328125, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 123019992, + "step": 1963 + }, + { + "epoch": 6.535773710482529, + "grad_norm": 21.338104248046875, + "learning_rate": 5e-06, + "loss": 0.6764, + "num_input_tokens_seen": 123083924, + "step": 1964 + }, + { + "epoch": 6.535773710482529, + "loss": 0.6724074482917786, + "loss_ce": 4.4186064769746736e-05, + "loss_iou": 0.201171875, + "loss_num": 0.05419921875, + "loss_xval": 0.671875, + "num_input_tokens_seen": 123083924, + "step": 1964 + }, + { + "epoch": 6.539101497504159, + "grad_norm": 22.636884689331055, + "learning_rate": 5e-06, + "loss": 0.6027, + "num_input_tokens_seen": 123144872, + "step": 1965 + }, + { + "epoch": 6.539101497504159, + "loss": 0.6550688147544861, + "loss_ce": 0.0002073537471005693, + "loss_iou": 0.2158203125, + "loss_num": 0.04443359375, + "loss_xval": 0.65625, + "num_input_tokens_seen": 123144872, + "step": 1965 + }, + { + "epoch": 6.54242928452579, + "grad_norm": 56.49250411987305, + "learning_rate": 5e-06, + "loss": 0.5544, + "num_input_tokens_seen": 123206232, + "step": 1966 + }, + { + "epoch": 6.54242928452579, + "loss": 0.5780704617500305, + "loss_ce": 6.4996402215911075e-06, + "loss_iou": 0.126953125, + "loss_num": 0.064453125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 123206232, + "step": 1966 + }, + { + "epoch": 6.5457570715474205, + "grad_norm": 10.766104698181152, + "learning_rate": 5e-06, + "loss": 0.4923, + "num_input_tokens_seen": 123268996, + "step": 1967 + }, + { + "epoch": 6.5457570715474205, + "loss": 0.5199794769287109, + "loss_ce": 0.00032618455588817596, + "loss_iou": 0.1640625, + "loss_num": 0.038330078125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 123268996, + "step": 1967 + }, + { + "epoch": 6.549084858569051, + "grad_norm": 11.645246505737305, + "learning_rate": 5e-06, + "loss": 0.653, + "num_input_tokens_seen": 123334080, + "step": 1968 + }, + { + "epoch": 6.549084858569051, + "loss": 0.7981342673301697, + "loss_ce": 0.0012592482380568981, + "loss_iou": 0.2890625, + "loss_num": 0.043701171875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 123334080, + "step": 1968 + }, + { + "epoch": 6.552412645590682, + "grad_norm": 8.64057731628418, + "learning_rate": 5e-06, + "loss": 0.3998, + "num_input_tokens_seen": 123396060, + "step": 1969 + }, + { + "epoch": 6.552412645590682, + "loss": 0.4468684196472168, + "loss_ce": 9.108871017815545e-05, + "loss_iou": 0.1064453125, + "loss_num": 0.046875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 123396060, + "step": 1969 + }, + { + "epoch": 6.555740432612312, + "grad_norm": 24.30341339111328, + "learning_rate": 5e-06, + "loss": 0.7164, + "num_input_tokens_seen": 123460000, + "step": 1970 + }, + { + "epoch": 6.555740432612312, + "loss": 0.6704313158988953, + "loss_ce": 0.0004484155506361276, + "loss_iou": 0.25390625, + "loss_num": 0.032470703125, + "loss_xval": 0.671875, + "num_input_tokens_seen": 123460000, + "step": 1970 + }, + { + "epoch": 6.559068219633943, + "grad_norm": 39.814449310302734, + "learning_rate": 5e-06, + "loss": 0.9853, + "num_input_tokens_seen": 123523824, + "step": 1971 + }, + { + "epoch": 6.559068219633943, + "loss": 1.0057671070098877, + "loss_ce": 0.0018607999663800001, + "loss_iou": 0.365234375, + "loss_num": 0.054443359375, + "loss_xval": 1.0, + "num_input_tokens_seen": 123523824, + "step": 1971 + }, + { + "epoch": 6.5623960066555735, + "grad_norm": 27.571672439575195, + "learning_rate": 5e-06, + "loss": 0.7632, + "num_input_tokens_seen": 123587972, + "step": 1972 + }, + { + "epoch": 6.5623960066555735, + "loss": 0.6259521245956421, + "loss_ce": 3.6633864510804415e-05, + "loss_iou": 0.208984375, + "loss_num": 0.041748046875, + "loss_xval": 0.625, + "num_input_tokens_seen": 123587972, + "step": 1972 + }, + { + "epoch": 6.565723793677205, + "grad_norm": 34.187774658203125, + "learning_rate": 5e-06, + "loss": 0.7046, + "num_input_tokens_seen": 123650356, + "step": 1973 + }, + { + "epoch": 6.565723793677205, + "loss": 0.6025407314300537, + "loss_ce": 1.6855144622240914e-06, + "loss_iou": 0.203125, + "loss_num": 0.03955078125, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 123650356, + "step": 1973 + }, + { + "epoch": 6.569051580698836, + "grad_norm": 20.649272918701172, + "learning_rate": 5e-06, + "loss": 0.6901, + "num_input_tokens_seen": 123713788, + "step": 1974 + }, + { + "epoch": 6.569051580698836, + "loss": 0.7274590730667114, + "loss_ce": 0.0004083071544300765, + "loss_iou": 0.26953125, + "loss_num": 0.037353515625, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 123713788, + "step": 1974 + }, + { + "epoch": 6.572379367720466, + "grad_norm": 13.874034881591797, + "learning_rate": 5e-06, + "loss": 0.6211, + "num_input_tokens_seen": 123777400, + "step": 1975 + }, + { + "epoch": 6.572379367720466, + "loss": 0.5335060358047485, + "loss_ce": 0.000791211670730263, + "loss_iou": 0.1982421875, + "loss_num": 0.02734375, + "loss_xval": 0.53125, + "num_input_tokens_seen": 123777400, + "step": 1975 + }, + { + "epoch": 6.575707154742097, + "grad_norm": 9.80647087097168, + "learning_rate": 5e-06, + "loss": 0.6299, + "num_input_tokens_seen": 123840460, + "step": 1976 + }, + { + "epoch": 6.575707154742097, + "loss": 0.4524020254611969, + "loss_ce": 9.460385626880452e-06, + "loss_iou": 0.12890625, + "loss_num": 0.038818359375, + "loss_xval": 0.453125, + "num_input_tokens_seen": 123840460, + "step": 1976 + }, + { + "epoch": 6.5790349417637275, + "grad_norm": 29.99645233154297, + "learning_rate": 5e-06, + "loss": 0.6651, + "num_input_tokens_seen": 123901060, + "step": 1977 + }, + { + "epoch": 6.5790349417637275, + "loss": 0.8397188186645508, + "loss_ce": 0.0008516378584317863, + "loss_iou": 0.251953125, + "loss_num": 0.06640625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 123901060, + "step": 1977 + }, + { + "epoch": 6.582362728785358, + "grad_norm": 48.624412536621094, + "learning_rate": 5e-06, + "loss": 0.5813, + "num_input_tokens_seen": 123962368, + "step": 1978 + }, + { + "epoch": 6.582362728785358, + "loss": 0.6665137410163879, + "loss_ce": 7.087117410264909e-05, + "loss_iou": 0.244140625, + "loss_num": 0.035888671875, + "loss_xval": 0.66796875, + "num_input_tokens_seen": 123962368, + "step": 1978 + }, + { + "epoch": 6.585690515806989, + "grad_norm": 15.941285133361816, + "learning_rate": 5e-06, + "loss": 0.4458, + "num_input_tokens_seen": 124024356, + "step": 1979 + }, + { + "epoch": 6.585690515806989, + "loss": 0.5257315635681152, + "loss_ce": 0.0004630337643902749, + "loss_iou": 0.1494140625, + "loss_num": 0.045166015625, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 124024356, + "step": 1979 + }, + { + "epoch": 6.589018302828619, + "grad_norm": 11.377344131469727, + "learning_rate": 5e-06, + "loss": 0.7924, + "num_input_tokens_seen": 124087092, + "step": 1980 + }, + { + "epoch": 6.589018302828619, + "loss": 0.7506057024002075, + "loss_ce": 0.0005446606664918363, + "loss_iou": 0.2265625, + "loss_num": 0.0595703125, + "loss_xval": 0.75, + "num_input_tokens_seen": 124087092, + "step": 1980 + }, + { + "epoch": 6.59234608985025, + "grad_norm": 22.64354133605957, + "learning_rate": 5e-06, + "loss": 0.6216, + "num_input_tokens_seen": 124149004, + "step": 1981 + }, + { + "epoch": 6.59234608985025, + "loss": 0.33752793073654175, + "loss_ce": 3.5264533835288603e-06, + "loss_iou": 0.11279296875, + "loss_num": 0.0224609375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 124149004, + "step": 1981 + }, + { + "epoch": 6.595673876871881, + "grad_norm": 13.62070083618164, + "learning_rate": 5e-06, + "loss": 0.624, + "num_input_tokens_seen": 124212744, + "step": 1982 + }, + { + "epoch": 6.595673876871881, + "loss": 0.7718583345413208, + "loss_ce": 0.0008622486493550241, + "loss_iou": 0.26171875, + "loss_num": 0.04931640625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 124212744, + "step": 1982 + }, + { + "epoch": 6.599001663893511, + "grad_norm": 32.08122253417969, + "learning_rate": 5e-06, + "loss": 0.7776, + "num_input_tokens_seen": 124275488, + "step": 1983 + }, + { + "epoch": 6.599001663893511, + "loss": 0.7189164757728577, + "loss_ce": 0.00028856488643214107, + "loss_iou": 0.1982421875, + "loss_num": 0.064453125, + "loss_xval": 0.71875, + "num_input_tokens_seen": 124275488, + "step": 1983 + }, + { + "epoch": 6.602329450915142, + "grad_norm": 23.14647102355957, + "learning_rate": 5e-06, + "loss": 0.5029, + "num_input_tokens_seen": 124336988, + "step": 1984 + }, + { + "epoch": 6.602329450915142, + "loss": 0.5258368253707886, + "loss_ce": 0.00020208263595122844, + "loss_iou": 0.1767578125, + "loss_num": 0.034423828125, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 124336988, + "step": 1984 + }, + { + "epoch": 6.605657237936772, + "grad_norm": 8.717888832092285, + "learning_rate": 5e-06, + "loss": 0.6094, + "num_input_tokens_seen": 124398312, + "step": 1985 + }, + { + "epoch": 6.605657237936772, + "loss": 0.942054033279419, + "loss_ce": 0.0006478212890215218, + "loss_iou": 0.2890625, + "loss_num": 0.07275390625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 124398312, + "step": 1985 + }, + { + "epoch": 6.608985024958403, + "grad_norm": 11.982193946838379, + "learning_rate": 5e-06, + "loss": 0.4813, + "num_input_tokens_seen": 124461176, + "step": 1986 + }, + { + "epoch": 6.608985024958403, + "loss": 0.5817021727561951, + "loss_ce": 0.0013798931613564491, + "loss_iou": 0.224609375, + "loss_num": 0.0262451171875, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 124461176, + "step": 1986 + }, + { + "epoch": 6.612312811980034, + "grad_norm": 40.03938674926758, + "learning_rate": 5e-06, + "loss": 0.5984, + "num_input_tokens_seen": 124524312, + "step": 1987 + }, + { + "epoch": 6.612312811980034, + "loss": 0.7929661273956299, + "loss_ce": 0.00024154715356417, + "loss_iou": 0.27734375, + "loss_num": 0.04736328125, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 124524312, + "step": 1987 + }, + { + "epoch": 6.615640599001664, + "grad_norm": 20.670000076293945, + "learning_rate": 5e-06, + "loss": 0.7356, + "num_input_tokens_seen": 124586984, + "step": 1988 + }, + { + "epoch": 6.615640599001664, + "loss": 0.534072995185852, + "loss_ce": 0.000625723332632333, + "loss_iou": 0.17578125, + "loss_num": 0.03662109375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 124586984, + "step": 1988 + }, + { + "epoch": 6.618968386023295, + "grad_norm": 13.344552993774414, + "learning_rate": 5e-06, + "loss": 0.7439, + "num_input_tokens_seen": 124648496, + "step": 1989 + }, + { + "epoch": 6.618968386023295, + "loss": 0.8475092053413391, + "loss_ce": 0.0009515842539258301, + "loss_iou": 0.28125, + "loss_num": 0.056640625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 124648496, + "step": 1989 + }, + { + "epoch": 6.6222961730449255, + "grad_norm": 17.5792179107666, + "learning_rate": 5e-06, + "loss": 0.5477, + "num_input_tokens_seen": 124711420, + "step": 1990 + }, + { + "epoch": 6.6222961730449255, + "loss": 0.6316379904747009, + "loss_ce": 4.6201777877286077e-05, + "loss_iou": 0.205078125, + "loss_num": 0.044677734375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 124711420, + "step": 1990 + }, + { + "epoch": 6.625623960066556, + "grad_norm": 11.981757164001465, + "learning_rate": 5e-06, + "loss": 0.5592, + "num_input_tokens_seen": 124774256, + "step": 1991 + }, + { + "epoch": 6.625623960066556, + "loss": 0.4685099422931671, + "loss_ce": 4.079750851815334e-06, + "loss_iou": 0.115234375, + "loss_num": 0.0478515625, + "loss_xval": 0.46875, + "num_input_tokens_seen": 124774256, + "step": 1991 + }, + { + "epoch": 6.628951747088187, + "grad_norm": 19.191776275634766, + "learning_rate": 5e-06, + "loss": 0.751, + "num_input_tokens_seen": 124837888, + "step": 1992 + }, + { + "epoch": 6.628951747088187, + "loss": 0.8945164084434509, + "loss_ce": 0.0010838116286322474, + "loss_iou": 0.34375, + "loss_num": 0.041259765625, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 124837888, + "step": 1992 + }, + { + "epoch": 6.632279534109817, + "grad_norm": 15.3238525390625, + "learning_rate": 5e-06, + "loss": 0.7391, + "num_input_tokens_seen": 124902180, + "step": 1993 + }, + { + "epoch": 6.632279534109817, + "loss": 0.8709877729415894, + "loss_ce": 0.0006264817784540355, + "loss_iou": 0.337890625, + "loss_num": 0.038818359375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 124902180, + "step": 1993 + }, + { + "epoch": 6.635607321131448, + "grad_norm": 26.2684383392334, + "learning_rate": 5e-06, + "loss": 0.6892, + "num_input_tokens_seen": 124964652, + "step": 1994 + }, + { + "epoch": 6.635607321131448, + "loss": 0.5876778364181519, + "loss_ce": 3.134192229481414e-05, + "loss_iou": 0.185546875, + "loss_num": 0.04345703125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 124964652, + "step": 1994 + }, + { + "epoch": 6.6389351081530785, + "grad_norm": 15.246932029724121, + "learning_rate": 5e-06, + "loss": 0.5931, + "num_input_tokens_seen": 125027128, + "step": 1995 + }, + { + "epoch": 6.6389351081530785, + "loss": 0.5009132623672485, + "loss_ce": 0.0004249872581567615, + "loss_iou": 0.1728515625, + "loss_num": 0.031005859375, + "loss_xval": 0.5, + "num_input_tokens_seen": 125027128, + "step": 1995 + }, + { + "epoch": 6.642262895174709, + "grad_norm": 16.900373458862305, + "learning_rate": 5e-06, + "loss": 0.5838, + "num_input_tokens_seen": 125090272, + "step": 1996 + }, + { + "epoch": 6.642262895174709, + "loss": 0.6842046976089478, + "loss_ce": 0.0001226711319759488, + "loss_iou": 0.2265625, + "loss_num": 0.04638671875, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 125090272, + "step": 1996 + }, + { + "epoch": 6.64559068219634, + "grad_norm": 11.600629806518555, + "learning_rate": 5e-06, + "loss": 0.5266, + "num_input_tokens_seen": 125152640, + "step": 1997 + }, + { + "epoch": 6.64559068219634, + "loss": 0.5774567127227783, + "loss_ce": 3.054131411772687e-06, + "loss_iou": 0.2216796875, + "loss_num": 0.0269775390625, + "loss_xval": 0.578125, + "num_input_tokens_seen": 125152640, + "step": 1997 + }, + { + "epoch": 6.64891846921797, + "grad_norm": 20.360536575317383, + "learning_rate": 5e-06, + "loss": 0.5939, + "num_input_tokens_seen": 125213584, + "step": 1998 + }, + { + "epoch": 6.64891846921797, + "loss": 0.7232832908630371, + "loss_ce": 0.0008712068083696067, + "loss_iou": 0.275390625, + "loss_num": 0.0341796875, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 125213584, + "step": 1998 + }, + { + "epoch": 6.652246256239601, + "grad_norm": 8.929659843444824, + "learning_rate": 5e-06, + "loss": 0.5807, + "num_input_tokens_seen": 125276516, + "step": 1999 + }, + { + "epoch": 6.652246256239601, + "loss": 0.4460247755050659, + "loss_ce": 0.00046813851804472506, + "loss_iou": 0.16015625, + "loss_num": 0.025146484375, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 125276516, + "step": 1999 + }, + { + "epoch": 6.655574043261232, + "grad_norm": 14.47555923461914, + "learning_rate": 5e-06, + "loss": 0.6939, + "num_input_tokens_seen": 125338692, + "step": 2000 + }, + { + "epoch": 6.655574043261232, + "eval_seeclick_CIoU": 0.1068805642426014, + "eval_seeclick_GIoU": 0.11891250684857368, + "eval_seeclick_IoU": 0.20469707250595093, + "eval_seeclick_MAE_all": 0.16567593067884445, + "eval_seeclick_MAE_h": 0.05109903775155544, + "eval_seeclick_MAE_w": 0.12995515763759613, + "eval_seeclick_MAE_x_boxes": 0.22886644303798676, + "eval_seeclick_MAE_y_boxes": 0.13595783710479736, + "eval_seeclick_NUM_probability": 0.9999660849571228, + "eval_seeclick_inside_bbox": 0.22500000149011612, + "eval_seeclick_loss": 2.713005304336548, + "eval_seeclick_loss_ce": 0.11975685507059097, + "eval_seeclick_loss_iou": 0.88720703125, + "eval_seeclick_loss_num": 0.1656036376953125, + "eval_seeclick_loss_xval": 2.6025390625, + "eval_seeclick_runtime": 69.2988, + "eval_seeclick_samples_per_second": 0.678, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 125338692, + "step": 2000 + }, + { + "epoch": 6.655574043261232, + "eval_icons_CIoU": 0.03829660825431347, + "eval_icons_GIoU": 0.1595398709177971, + "eval_icons_IoU": 0.19770432263612747, + "eval_icons_MAE_all": 0.16016975790262222, + "eval_icons_MAE_h": 0.0871335044503212, + "eval_icons_MAE_w": 0.15490344911813736, + "eval_icons_MAE_x_boxes": 0.17506500333547592, + "eval_icons_MAE_y_boxes": 0.0526757575571537, + "eval_icons_NUM_probability": 0.9999957382678986, + "eval_icons_inside_bbox": 0.3420138955116272, + "eval_icons_loss": 2.462501287460327, + "eval_icons_loss_ce": 1.5740541243758344e-06, + "eval_icons_loss_iou": 0.8251953125, + "eval_icons_loss_num": 0.1562976837158203, + "eval_icons_loss_xval": 2.430908203125, + "eval_icons_runtime": 71.6265, + "eval_icons_samples_per_second": 0.698, + "eval_icons_steps_per_second": 0.028, + "num_input_tokens_seen": 125338692, + "step": 2000 + }, + { + "epoch": 6.655574043261232, + "eval_screenspot_CIoU": 0.0693756896071136, + "eval_screenspot_GIoU": 0.13485626379648843, + "eval_screenspot_IoU": 0.21177987257639566, + "eval_screenspot_MAE_all": 0.1937429408232371, + "eval_screenspot_MAE_h": 0.06855036566654842, + "eval_screenspot_MAE_w": 0.1509904464085897, + "eval_screenspot_MAE_x_boxes": 0.2597081462542216, + "eval_screenspot_MAE_y_boxes": 0.1477653905749321, + "eval_screenspot_NUM_probability": 0.9999565482139587, + "eval_screenspot_inside_bbox": 0.37583333253860474, + "eval_screenspot_loss": 2.735307455062866, + "eval_screenspot_loss_ce": 5.104218810932556e-05, + "eval_screenspot_loss_iou": 0.8837890625, + "eval_screenspot_loss_num": 0.20152791341145834, + "eval_screenspot_loss_xval": 2.7766927083333335, + "eval_screenspot_runtime": 131.5463, + "eval_screenspot_samples_per_second": 0.677, + "eval_screenspot_steps_per_second": 0.023, + "num_input_tokens_seen": 125338692, + "step": 2000 + }, + { + "epoch": 6.655574043261232, + "eval_compot_CIoU": 0.005251538008451462, + "eval_compot_GIoU": 0.10733498632907867, + "eval_compot_IoU": 0.17231258749961853, + "eval_compot_MAE_all": 0.19772262126207352, + "eval_compot_MAE_h": 0.05648845434188843, + "eval_compot_MAE_w": 0.24372312426567078, + "eval_compot_MAE_x_boxes": 0.20571433007717133, + "eval_compot_MAE_y_boxes": 0.10716105252504349, + "eval_compot_NUM_probability": 0.9999851286411285, + "eval_compot_inside_bbox": 0.3229166716337204, + "eval_compot_loss": 2.8182108402252197, + "eval_compot_loss_ce": 0.0029962222324684262, + "eval_compot_loss_iou": 0.91943359375, + "eval_compot_loss_num": 0.20189666748046875, + "eval_compot_loss_xval": 2.84716796875, + "eval_compot_runtime": 78.1032, + "eval_compot_samples_per_second": 0.64, + "eval_compot_steps_per_second": 0.026, + "num_input_tokens_seen": 125338692, + "step": 2000 + }, + { + "epoch": 6.655574043261232, + "eval_custom_ui_MAE_all": 0.07614851370453835, + "eval_custom_ui_MAE_x": 0.07330591231584549, + "eval_custom_ui_MAE_y": 0.0789911113679409, + "eval_custom_ui_NUM_probability": 0.9999920725822449, + "eval_custom_ui_loss": 0.3671107590198517, + "eval_custom_ui_loss_ce": 1.62217548904664e-06, + "eval_custom_ui_loss_num": 0.0698394775390625, + "eval_custom_ui_loss_xval": 0.34930419921875, + "eval_custom_ui_runtime": 54.0842, + "eval_custom_ui_samples_per_second": 0.924, + "eval_custom_ui_steps_per_second": 0.037, + "num_input_tokens_seen": 125338692, + "step": 2000 + } + ], + "logging_steps": 1.0, + "max_steps": 15000, + "num_input_tokens_seen": 125338692, + "num_train_epochs": 50, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.830993252785848e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}