{ "best_metric": 0.15960238873958588, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.11961722488038277, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011961722488038277, "grad_norm": 1.9494132995605469, "learning_rate": 1e-05, "loss": 0.9221, "step": 1 }, { "epoch": 0.0011961722488038277, "eval_loss": 1.7471424341201782, "eval_runtime": 35.3181, "eval_samples_per_second": 19.933, "eval_steps_per_second": 4.983, "step": 1 }, { "epoch": 0.0023923444976076554, "grad_norm": 2.3642637729644775, "learning_rate": 2e-05, "loss": 1.1185, "step": 2 }, { "epoch": 0.0035885167464114833, "grad_norm": 2.282869815826416, "learning_rate": 3e-05, "loss": 1.1527, "step": 3 }, { "epoch": 0.004784688995215311, "grad_norm": 2.2901337146759033, "learning_rate": 4e-05, "loss": 1.1557, "step": 4 }, { "epoch": 0.005980861244019139, "grad_norm": 1.7862334251403809, "learning_rate": 5e-05, "loss": 1.0143, "step": 5 }, { "epoch": 0.007177033492822967, "grad_norm": 1.8213731050491333, "learning_rate": 6e-05, "loss": 1.0051, "step": 6 }, { "epoch": 0.008373205741626795, "grad_norm": 1.5596104860305786, "learning_rate": 7e-05, "loss": 0.831, "step": 7 }, { "epoch": 0.009569377990430622, "grad_norm": 1.7667557001113892, "learning_rate": 8e-05, "loss": 0.8318, "step": 8 }, { "epoch": 0.01076555023923445, "grad_norm": 1.9082083702087402, "learning_rate": 9e-05, "loss": 0.7814, "step": 9 }, { "epoch": 0.011961722488038277, "grad_norm": 1.699601411819458, "learning_rate": 0.0001, "loss": 0.7049, "step": 10 }, { "epoch": 0.013157894736842105, "grad_norm": 1.7066322565078735, "learning_rate": 9.99695413509548e-05, "loss": 0.5715, "step": 11 }, { "epoch": 0.014354066985645933, "grad_norm": 1.726962924003601, "learning_rate": 9.987820251299122e-05, "loss": 0.464, "step": 12 }, { "epoch": 0.01555023923444976, "grad_norm": 1.3154484033584595, "learning_rate": 9.972609476841367e-05, "loss": 0.4189, "step": 13 }, { "epoch": 0.01674641148325359, "grad_norm": 1.44478440284729, "learning_rate": 9.951340343707852e-05, "loss": 0.4308, "step": 14 }, { "epoch": 0.017942583732057416, "grad_norm": 1.3897327184677124, "learning_rate": 9.924038765061042e-05, "loss": 0.3837, "step": 15 }, { "epoch": 0.019138755980861243, "grad_norm": 1.588050127029419, "learning_rate": 9.890738003669029e-05, "loss": 0.3804, "step": 16 }, { "epoch": 0.02033492822966507, "grad_norm": 1.2375253438949585, "learning_rate": 9.851478631379982e-05, "loss": 0.3513, "step": 17 }, { "epoch": 0.0215311004784689, "grad_norm": 1.5607556104660034, "learning_rate": 9.806308479691595e-05, "loss": 0.382, "step": 18 }, { "epoch": 0.022727272727272728, "grad_norm": 1.2236876487731934, "learning_rate": 9.755282581475769e-05, "loss": 0.3218, "step": 19 }, { "epoch": 0.023923444976076555, "grad_norm": 1.2549458742141724, "learning_rate": 9.698463103929542e-05, "loss": 0.3437, "step": 20 }, { "epoch": 0.025119617224880382, "grad_norm": 1.242065668106079, "learning_rate": 9.635919272833938e-05, "loss": 0.3025, "step": 21 }, { "epoch": 0.02631578947368421, "grad_norm": 1.5587633848190308, "learning_rate": 9.567727288213005e-05, "loss": 0.3169, "step": 22 }, { "epoch": 0.02751196172248804, "grad_norm": 1.2049616575241089, "learning_rate": 9.493970231495835e-05, "loss": 0.2571, "step": 23 }, { "epoch": 0.028708133971291867, "grad_norm": 1.2683987617492676, "learning_rate": 9.414737964294636e-05, "loss": 0.2468, "step": 24 }, { "epoch": 0.029904306220095694, "grad_norm": 1.271965503692627, "learning_rate": 9.330127018922194e-05, "loss": 0.2941, "step": 25 }, { "epoch": 0.03110047846889952, "grad_norm": 1.4347857236862183, "learning_rate": 9.24024048078213e-05, "loss": 0.2639, "step": 26 }, { "epoch": 0.03229665071770335, "grad_norm": 1.281456708908081, "learning_rate": 9.145187862775209e-05, "loss": 0.2307, "step": 27 }, { "epoch": 0.03349282296650718, "grad_norm": 1.2285360097885132, "learning_rate": 9.045084971874738e-05, "loss": 0.2135, "step": 28 }, { "epoch": 0.034688995215311005, "grad_norm": 1.1313438415527344, "learning_rate": 8.940053768033609e-05, "loss": 0.2001, "step": 29 }, { "epoch": 0.03588516746411483, "grad_norm": 1.2885600328445435, "learning_rate": 8.83022221559489e-05, "loss": 0.2083, "step": 30 }, { "epoch": 0.03708133971291866, "grad_norm": 1.4596316814422607, "learning_rate": 8.715724127386972e-05, "loss": 0.2443, "step": 31 }, { "epoch": 0.03827751196172249, "grad_norm": 0.982135534286499, "learning_rate": 8.596699001693255e-05, "loss": 0.1612, "step": 32 }, { "epoch": 0.039473684210526314, "grad_norm": 1.1624139547348022, "learning_rate": 8.473291852294987e-05, "loss": 0.1982, "step": 33 }, { "epoch": 0.04066985645933014, "grad_norm": 1.4763832092285156, "learning_rate": 8.345653031794292e-05, "loss": 0.1981, "step": 34 }, { "epoch": 0.041866028708133975, "grad_norm": 1.56986403465271, "learning_rate": 8.213938048432697e-05, "loss": 0.1967, "step": 35 }, { "epoch": 0.0430622009569378, "grad_norm": 1.4756582975387573, "learning_rate": 8.07830737662829e-05, "loss": 0.2175, "step": 36 }, { "epoch": 0.04425837320574163, "grad_norm": 1.4440913200378418, "learning_rate": 7.938926261462366e-05, "loss": 0.2053, "step": 37 }, { "epoch": 0.045454545454545456, "grad_norm": 1.2837365865707397, "learning_rate": 7.795964517353735e-05, "loss": 0.2031, "step": 38 }, { "epoch": 0.04665071770334928, "grad_norm": 1.421876311302185, "learning_rate": 7.649596321166024e-05, "loss": 0.2062, "step": 39 }, { "epoch": 0.04784688995215311, "grad_norm": 1.4365227222442627, "learning_rate": 7.500000000000001e-05, "loss": 0.1868, "step": 40 }, { "epoch": 0.04904306220095694, "grad_norm": 1.5663189888000488, "learning_rate": 7.347357813929454e-05, "loss": 0.2521, "step": 41 }, { "epoch": 0.050239234449760764, "grad_norm": 1.4127620458602905, "learning_rate": 7.191855733945387e-05, "loss": 0.1688, "step": 42 }, { "epoch": 0.05143540669856459, "grad_norm": 1.5866608619689941, "learning_rate": 7.033683215379002e-05, "loss": 0.224, "step": 43 }, { "epoch": 0.05263157894736842, "grad_norm": 2.2985501289367676, "learning_rate": 6.873032967079561e-05, "loss": 0.3267, "step": 44 }, { "epoch": 0.05382775119617225, "grad_norm": 2.202700138092041, "learning_rate": 6.710100716628344e-05, "loss": 0.2247, "step": 45 }, { "epoch": 0.05502392344497608, "grad_norm": 1.6158735752105713, "learning_rate": 6.545084971874738e-05, "loss": 0.1816, "step": 46 }, { "epoch": 0.056220095693779906, "grad_norm": 2.0496480464935303, "learning_rate": 6.378186779084995e-05, "loss": 0.141, "step": 47 }, { "epoch": 0.05741626794258373, "grad_norm": 2.518812894821167, "learning_rate": 6.209609477998338e-05, "loss": 0.1988, "step": 48 }, { "epoch": 0.05861244019138756, "grad_norm": 3.8230884075164795, "learning_rate": 6.0395584540887963e-05, "loss": 0.3154, "step": 49 }, { "epoch": 0.05980861244019139, "grad_norm": 3.400611400604248, "learning_rate": 5.868240888334653e-05, "loss": 0.2167, "step": 50 }, { "epoch": 0.05980861244019139, "eval_loss": 0.19923071563243866, "eval_runtime": 35.3983, "eval_samples_per_second": 19.888, "eval_steps_per_second": 4.972, "step": 50 }, { "epoch": 0.061004784688995214, "grad_norm": 0.7308669090270996, "learning_rate": 5.695865504800327e-05, "loss": 0.1903, "step": 51 }, { "epoch": 0.06220095693779904, "grad_norm": 0.8045475482940674, "learning_rate": 5.522642316338268e-05, "loss": 0.2067, "step": 52 }, { "epoch": 0.06339712918660287, "grad_norm": 0.8171185851097107, "learning_rate": 5.348782368720626e-05, "loss": 0.2032, "step": 53 }, { "epoch": 0.0645933014354067, "grad_norm": 0.6744611859321594, "learning_rate": 5.174497483512506e-05, "loss": 0.1581, "step": 54 }, { "epoch": 0.06578947368421052, "grad_norm": 0.7448108792304993, "learning_rate": 5e-05, "loss": 0.1815, "step": 55 }, { "epoch": 0.06698564593301436, "grad_norm": 0.7760705351829529, "learning_rate": 4.825502516487497e-05, "loss": 0.1738, "step": 56 }, { "epoch": 0.06818181818181818, "grad_norm": 0.8528043031692505, "learning_rate": 4.6512176312793736e-05, "loss": 0.1841, "step": 57 }, { "epoch": 0.06937799043062201, "grad_norm": 0.905849277973175, "learning_rate": 4.477357683661734e-05, "loss": 0.1738, "step": 58 }, { "epoch": 0.07057416267942583, "grad_norm": 0.8059756755828857, "learning_rate": 4.3041344951996746e-05, "loss": 0.1722, "step": 59 }, { "epoch": 0.07177033492822966, "grad_norm": 0.8063069581985474, "learning_rate": 4.131759111665349e-05, "loss": 0.1927, "step": 60 }, { "epoch": 0.0729665071770335, "grad_norm": 0.825333833694458, "learning_rate": 3.960441545911204e-05, "loss": 0.1445, "step": 61 }, { "epoch": 0.07416267942583732, "grad_norm": 0.8422712087631226, "learning_rate": 3.790390522001662e-05, "loss": 0.1558, "step": 62 }, { "epoch": 0.07535885167464115, "grad_norm": 0.9840994477272034, "learning_rate": 3.6218132209150045e-05, "loss": 0.1733, "step": 63 }, { "epoch": 0.07655502392344497, "grad_norm": 0.8723501563072205, "learning_rate": 3.4549150281252636e-05, "loss": 0.1491, "step": 64 }, { "epoch": 0.07775119617224881, "grad_norm": 0.8056702017784119, "learning_rate": 3.289899283371657e-05, "loss": 0.1573, "step": 65 }, { "epoch": 0.07894736842105263, "grad_norm": 0.8925252556800842, "learning_rate": 3.12696703292044e-05, "loss": 0.1795, "step": 66 }, { "epoch": 0.08014354066985646, "grad_norm": 0.9729580283164978, "learning_rate": 2.9663167846209998e-05, "loss": 0.1791, "step": 67 }, { "epoch": 0.08133971291866028, "grad_norm": 1.0415399074554443, "learning_rate": 2.8081442660546125e-05, "loss": 0.177, "step": 68 }, { "epoch": 0.08253588516746412, "grad_norm": 0.8020843863487244, "learning_rate": 2.6526421860705473e-05, "loss": 0.1268, "step": 69 }, { "epoch": 0.08373205741626795, "grad_norm": 0.8391036987304688, "learning_rate": 2.500000000000001e-05, "loss": 0.1344, "step": 70 }, { "epoch": 0.08492822966507177, "grad_norm": 1.1039748191833496, "learning_rate": 2.350403678833976e-05, "loss": 0.1954, "step": 71 }, { "epoch": 0.0861244019138756, "grad_norm": 0.9647433757781982, "learning_rate": 2.2040354826462668e-05, "loss": 0.1759, "step": 72 }, { "epoch": 0.08732057416267942, "grad_norm": 0.9805471301078796, "learning_rate": 2.061073738537635e-05, "loss": 0.1625, "step": 73 }, { "epoch": 0.08851674641148326, "grad_norm": 0.9431953430175781, "learning_rate": 1.9216926233717085e-05, "loss": 0.1464, "step": 74 }, { "epoch": 0.08971291866028708, "grad_norm": 0.9271813631057739, "learning_rate": 1.7860619515673033e-05, "loss": 0.1374, "step": 75 }, { "epoch": 0.09090909090909091, "grad_norm": 1.0403175354003906, "learning_rate": 1.6543469682057106e-05, "loss": 0.1637, "step": 76 }, { "epoch": 0.09210526315789473, "grad_norm": 0.9993944764137268, "learning_rate": 1.526708147705013e-05, "loss": 0.1522, "step": 77 }, { "epoch": 0.09330143540669857, "grad_norm": 1.0364563465118408, "learning_rate": 1.4033009983067452e-05, "loss": 0.1471, "step": 78 }, { "epoch": 0.09449760765550239, "grad_norm": 0.8725647926330566, "learning_rate": 1.2842758726130283e-05, "loss": 0.1113, "step": 79 }, { "epoch": 0.09569377990430622, "grad_norm": 0.9386890530586243, "learning_rate": 1.1697777844051105e-05, "loss": 0.1104, "step": 80 }, { "epoch": 0.09688995215311005, "grad_norm": 1.0741575956344604, "learning_rate": 1.0599462319663905e-05, "loss": 0.1286, "step": 81 }, { "epoch": 0.09808612440191387, "grad_norm": 1.2276641130447388, "learning_rate": 9.549150281252633e-06, "loss": 0.1517, "step": 82 }, { "epoch": 0.09928229665071771, "grad_norm": 1.12937593460083, "learning_rate": 8.548121372247918e-06, "loss": 0.1482, "step": 83 }, { "epoch": 0.10047846889952153, "grad_norm": 1.1188244819641113, "learning_rate": 7.597595192178702e-06, "loss": 0.1201, "step": 84 }, { "epoch": 0.10167464114832536, "grad_norm": 1.406683325767517, "learning_rate": 6.698729810778065e-06, "loss": 0.2082, "step": 85 }, { "epoch": 0.10287081339712918, "grad_norm": 1.214578628540039, "learning_rate": 5.852620357053651e-06, "loss": 0.186, "step": 86 }, { "epoch": 0.10406698564593302, "grad_norm": 1.270246148109436, "learning_rate": 5.060297685041659e-06, "loss": 0.1308, "step": 87 }, { "epoch": 0.10526315789473684, "grad_norm": 1.2750657796859741, "learning_rate": 4.322727117869951e-06, "loss": 0.1296, "step": 88 }, { "epoch": 0.10645933014354067, "grad_norm": 1.4091739654541016, "learning_rate": 3.6408072716606346e-06, "loss": 0.1985, "step": 89 }, { "epoch": 0.1076555023923445, "grad_norm": 1.3769253492355347, "learning_rate": 3.0153689607045845e-06, "loss": 0.1777, "step": 90 }, { "epoch": 0.10885167464114832, "grad_norm": 1.5859569311141968, "learning_rate": 2.4471741852423237e-06, "loss": 0.1514, "step": 91 }, { "epoch": 0.11004784688995216, "grad_norm": 1.3966237306594849, "learning_rate": 1.9369152030840556e-06, "loss": 0.1825, "step": 92 }, { "epoch": 0.11124401913875598, "grad_norm": 1.6977550983428955, "learning_rate": 1.4852136862001764e-06, "loss": 0.1799, "step": 93 }, { "epoch": 0.11244019138755981, "grad_norm": 1.6813634634017944, "learning_rate": 1.0926199633097157e-06, "loss": 0.1539, "step": 94 }, { "epoch": 0.11363636363636363, "grad_norm": 2.3753628730773926, "learning_rate": 7.596123493895991e-07, "loss": 0.2349, "step": 95 }, { "epoch": 0.11483253588516747, "grad_norm": 2.1884944438934326, "learning_rate": 4.865965629214819e-07, "loss": 0.2109, "step": 96 }, { "epoch": 0.11602870813397129, "grad_norm": 1.6471924781799316, "learning_rate": 2.7390523158633554e-07, "loss": 0.1618, "step": 97 }, { "epoch": 0.11722488038277512, "grad_norm": 1.5181317329406738, "learning_rate": 1.2179748700879012e-07, "loss": 0.102, "step": 98 }, { "epoch": 0.11842105263157894, "grad_norm": 5.536009788513184, "learning_rate": 3.04586490452119e-08, "loss": 0.5176, "step": 99 }, { "epoch": 0.11961722488038277, "grad_norm": 5.12874174118042, "learning_rate": 0.0, "loss": 0.2482, "step": 100 }, { "epoch": 0.11961722488038277, "eval_loss": 0.15960238873958588, "eval_runtime": 35.4137, "eval_samples_per_second": 19.879, "eval_steps_per_second": 4.97, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.015623072500941e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }