{"train_lr": 9.999999999999999e-05, "train_min_lr": 5.000000000000001e-06, "train_loss": 0.7508889809027091, "train_loss_scale": 16457.8018018018, "train_weight_decay": 0.009999999999999919, "train_grad_norm": NaN, "val_score": 61.64062583446503, "epoch": 0, "n_parameters": 676299994} {"train_lr": 0.00019924995732575055, "train_min_lr": 9.962497866287534e-06, "train_loss": 0.7641944492811913, "train_loss_scale": 8192.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 19.400131635121756, "val_score": 59.74609303474426, "epoch": 1, "n_parameters": 676299994} {"train_lr": 0.00019478136803002203, "train_min_lr": 9.739068401501093e-06, "train_loss": 0.6737550505109735, "train_loss_scale": 8192.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 18.90804101611759, "val_score": 60.13671863079071, "epoch": 2, "n_parameters": 676299994} {"train_lr": 0.00018606148692510566, "train_min_lr": 9.303074346255291e-06, "train_loss": 0.5660648773740362, "train_loss_scale": 8192.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 16.76666853306172, "val_score": 61.40625047683716, "epoch": 3, "n_parameters": 676299994} {"train_lr": 0.0001735275652606548, "train_min_lr": 8.67637826303274e-06, "train_loss": 0.48521199697130796, "train_loss_scale": 8192.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 15.521468236997679, "val_score": 61.21093797683716, "epoch": 4, "n_parameters": 676299994} {"train_lr": 0.0001578081062060238, "train_min_lr": 7.890405310301183e-06, "train_loss": 0.4136771082363508, "train_loss_scale": 8192.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 13.443657427578717, "val_score": 60.89843714237213, "epoch": 5, "n_parameters": 676299994} {"train_lr": 0.00013969134907695633, "train_min_lr": 6.984567453847813e-06, "train_loss": 0.367878481186367, "train_loss_scale": 10947.267267267267, "train_weight_decay": 0.009999999999999919, "train_grad_norm": NaN, "val_score": 62.05078089237213, "epoch": 6, "n_parameters": 676299994} {"train_lr": 0.00012008574372489397, "train_min_lr": 6.004287186244696e-06, "train_loss": 0.31006403900571056, "train_loss_scale": 6014.846846846847, "train_weight_decay": 0.009999999999999919, "train_grad_norm": Infinity, "val_score": 60.05859363079071, "epoch": 7, "n_parameters": 676299994} {"train_lr": 9.997439706806723e-05, "train_min_lr": 4.998719853403364e-06, "train_loss": 0.2987360617777965, "train_loss_scale": 4096.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 13.082661760461939, "val_score": 60.13671851158142, "epoch": 8, "n_parameters": 676299994} {"train_lr": 8.036577600551951e-05, "train_min_lr": 4.0182888002759745e-06, "train_loss": 0.2522135055358256, "train_loss_scale": 4096.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 11.78250663004838, "val_score": 61.03515577316284, "epoch": 9, "n_parameters": 676299994} {"train_lr": 6.224313867579823e-05, "train_min_lr": 3.112156933789908e-06, "train_loss": 0.2932859386006991, "train_loss_scale": 4096.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 13.465919440215057, "val_score": 61.484375, "epoch": 10, "n_parameters": 676299994} {"train_lr": 4.651522978815851e-05, "train_min_lr": 2.325761489407926e-06, "train_loss": 0.25077343647096967, "train_loss_scale": 4096.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 12.112754662115652, "val_score": 60.99609351158142, "epoch": 11, "n_parameters": 676299994} {"train_lr": 3.3970712368254936e-05, "train_min_lr": 1.698535618412745e-06, "train_loss": 0.23826836703999624, "train_loss_scale": 4096.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 12.375523013753575, "val_score": 61.69921827316284, "epoch": 12, "n_parameters": 676299994} {"train_lr": 2.5238620900864355e-05, "train_min_lr": 1.2619310450432175e-06, "train_loss": 0.23574608062357605, "train_loss_scale": 6248.552552552553, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 12.982973849809206, "val_score": 60.89843761920929, "epoch": 13, "n_parameters": 676299994} {"train_lr": 2.0756818914200326e-05, "train_min_lr": 1.037840945710016e-06, "train_loss": 0.23083499201514698, "train_loss_scale": 8192.0, "train_weight_decay": 0.009999999999999919, "train_grad_norm": 11.750323399767145, "val_score": 61.97265541553497, "epoch": 14, "n_parameters": 676299994}