{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 1700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 9.941176470588236e-05, "loss": 0.1482, "step": 10 }, { "epoch": 0.12, "learning_rate": 9.882352941176471e-05, "loss": 0.0992, "step": 20 }, { "epoch": 0.18, "learning_rate": 9.823529411764706e-05, "loss": 0.0313, "step": 30 }, { "epoch": 0.24, "learning_rate": 9.764705882352942e-05, "loss": 0.0365, "step": 40 }, { "epoch": 0.29, "learning_rate": 9.705882352941177e-05, "loss": 0.0326, "step": 50 }, { "epoch": 0.35, "learning_rate": 9.647058823529412e-05, "loss": 0.0122, "step": 60 }, { "epoch": 0.41, "learning_rate": 9.588235294117648e-05, "loss": 0.0045, "step": 70 }, { "epoch": 0.47, "learning_rate": 9.529411764705883e-05, "loss": 0.0044, "step": 80 }, { "epoch": 0.53, "learning_rate": 9.470588235294118e-05, "loss": 0.0016, "step": 90 }, { "epoch": 0.59, "learning_rate": 9.411764705882353e-05, "loss": 0.0036, "step": 100 }, { "epoch": 0.65, "learning_rate": 9.352941176470589e-05, "loss": 0.0158, "step": 110 }, { "epoch": 0.71, "learning_rate": 9.294117647058824e-05, "loss": 0.0019, "step": 120 }, { "epoch": 0.76, "learning_rate": 9.23529411764706e-05, "loss": 0.0132, "step": 130 }, { "epoch": 0.82, "learning_rate": 9.176470588235295e-05, "loss": 0.0025, "step": 140 }, { "epoch": 0.88, "learning_rate": 9.11764705882353e-05, "loss": 0.0049, "step": 150 }, { "epoch": 0.94, "learning_rate": 9.058823529411765e-05, "loss": 0.0181, "step": 160 }, { "epoch": 1.0, "learning_rate": 9e-05, "loss": 0.0153, "step": 170 }, { "epoch": 1.0, "eval_loss": 0.005996049847453833, "eval_mse": 0.005996049847453833, "eval_runtime": 15.3139, "eval_samples_per_second": 4.702, "eval_steps_per_second": 0.588, "step": 170 }, { "epoch": 1.06, "learning_rate": 8.941176470588236e-05, "loss": 0.0079, "step": 180 }, { "epoch": 1.12, "learning_rate": 8.882352941176471e-05, "loss": 0.0238, "step": 190 }, { "epoch": 1.18, "learning_rate": 8.823529411764706e-05, "loss": 0.0274, "step": 200 }, { "epoch": 1.24, "learning_rate": 8.764705882352942e-05, "loss": 0.0078, "step": 210 }, { "epoch": 1.29, "learning_rate": 8.705882352941177e-05, "loss": 0.0144, "step": 220 }, { "epoch": 1.35, "learning_rate": 8.647058823529412e-05, "loss": 0.0046, "step": 230 }, { "epoch": 1.41, "learning_rate": 8.588235294117646e-05, "loss": 0.0078, "step": 240 }, { "epoch": 1.47, "learning_rate": 8.529411764705883e-05, "loss": 0.0012, "step": 250 }, { "epoch": 1.53, "learning_rate": 8.470588235294118e-05, "loss": 0.0124, "step": 260 }, { "epoch": 1.59, "learning_rate": 8.411764705882354e-05, "loss": 0.0105, "step": 270 }, { "epoch": 1.65, "learning_rate": 8.352941176470589e-05, "loss": 0.0089, "step": 280 }, { "epoch": 1.71, "learning_rate": 8.294117647058824e-05, "loss": 0.0017, "step": 290 }, { "epoch": 1.76, "learning_rate": 8.23529411764706e-05, "loss": 0.0095, "step": 300 }, { "epoch": 1.82, "learning_rate": 8.176470588235295e-05, "loss": 0.0016, "step": 310 }, { "epoch": 1.88, "learning_rate": 8.11764705882353e-05, "loss": 0.0128, "step": 320 }, { "epoch": 1.94, "learning_rate": 8.058823529411765e-05, "loss": 0.008, "step": 330 }, { "epoch": 2.0, "learning_rate": 8e-05, "loss": 0.001, "step": 340 }, { "epoch": 2.0, "eval_loss": 0.0011689820094034076, "eval_mse": 0.0011689820094034076, "eval_runtime": 15.7171, "eval_samples_per_second": 4.581, "eval_steps_per_second": 0.573, "step": 340 }, { "epoch": 2.06, "learning_rate": 7.941176470588235e-05, "loss": 0.0005, "step": 350 }, { "epoch": 2.12, "learning_rate": 7.882352941176471e-05, "loss": 0.0007, "step": 360 }, { "epoch": 2.18, "learning_rate": 7.823529411764707e-05, "loss": 0.0183, "step": 370 }, { "epoch": 2.24, "learning_rate": 7.764705882352942e-05, "loss": 0.0021, "step": 380 }, { "epoch": 2.29, "learning_rate": 7.705882352941177e-05, "loss": 0.008, "step": 390 }, { "epoch": 2.35, "learning_rate": 7.647058823529411e-05, "loss": 0.0047, "step": 400 }, { "epoch": 2.41, "learning_rate": 7.588235294117648e-05, "loss": 0.011, "step": 410 }, { "epoch": 2.47, "learning_rate": 7.529411764705883e-05, "loss": 0.0024, "step": 420 }, { "epoch": 2.53, "learning_rate": 7.470588235294118e-05, "loss": 0.0019, "step": 430 }, { "epoch": 2.59, "learning_rate": 7.411764705882354e-05, "loss": 0.0074, "step": 440 }, { "epoch": 2.65, "learning_rate": 7.352941176470589e-05, "loss": 0.0017, "step": 450 }, { "epoch": 2.71, "learning_rate": 7.294117647058823e-05, "loss": 0.0081, "step": 460 }, { "epoch": 2.76, "learning_rate": 7.23529411764706e-05, "loss": 0.0195, "step": 470 }, { "epoch": 2.82, "learning_rate": 7.176470588235295e-05, "loss": 0.0072, "step": 480 }, { "epoch": 2.88, "learning_rate": 7.11764705882353e-05, "loss": 0.0048, "step": 490 }, { "epoch": 2.94, "learning_rate": 7.058823529411765e-05, "loss": 0.0012, "step": 500 }, { "epoch": 3.0, "learning_rate": 7e-05, "loss": 0.0036, "step": 510 }, { "epoch": 3.0, "eval_loss": 0.0050306557677686214, "eval_mse": 0.0050306557677686214, "eval_runtime": 16.8544, "eval_samples_per_second": 4.272, "eval_steps_per_second": 0.534, "step": 510 }, { "epoch": 3.06, "learning_rate": 6.941176470588236e-05, "loss": 0.0055, "step": 520 }, { "epoch": 3.12, "learning_rate": 6.882352941176471e-05, "loss": 0.0112, "step": 530 }, { "epoch": 3.18, "learning_rate": 6.823529411764707e-05, "loss": 0.0038, "step": 540 }, { "epoch": 3.24, "learning_rate": 6.764705882352942e-05, "loss": 0.0017, "step": 550 }, { "epoch": 3.29, "learning_rate": 6.705882352941176e-05, "loss": 0.0015, "step": 560 }, { "epoch": 3.35, "learning_rate": 6.647058823529411e-05, "loss": 0.001, "step": 570 }, { "epoch": 3.41, "learning_rate": 6.588235294117648e-05, "loss": 0.0115, "step": 580 }, { "epoch": 3.47, "learning_rate": 6.529411764705883e-05, "loss": 0.0024, "step": 590 }, { "epoch": 3.53, "learning_rate": 6.470588235294118e-05, "loss": 0.0021, "step": 600 }, { "epoch": 3.59, "learning_rate": 6.411764705882354e-05, "loss": 0.0015, "step": 610 }, { "epoch": 3.65, "learning_rate": 6.352941176470588e-05, "loss": 0.0016, "step": 620 }, { "epoch": 3.71, "learning_rate": 6.294117647058824e-05, "loss": 0.0103, "step": 630 }, { "epoch": 3.76, "learning_rate": 6.23529411764706e-05, "loss": 0.0108, "step": 640 }, { "epoch": 3.82, "learning_rate": 6.176470588235295e-05, "loss": 0.0071, "step": 650 }, { "epoch": 3.88, "learning_rate": 6.11764705882353e-05, "loss": 0.0078, "step": 660 }, { "epoch": 3.94, "learning_rate": 6.058823529411765e-05, "loss": 0.0043, "step": 670 }, { "epoch": 4.0, "learning_rate": 6e-05, "loss": 0.0017, "step": 680 }, { "epoch": 4.0, "eval_loss": 0.0019855075515806675, "eval_mse": 0.001985507318750024, "eval_runtime": 15.5377, "eval_samples_per_second": 4.634, "eval_steps_per_second": 0.579, "step": 680 }, { "epoch": 4.06, "learning_rate": 5.9411764705882355e-05, "loss": 0.0022, "step": 690 }, { "epoch": 4.12, "learning_rate": 5.882352941176471e-05, "loss": 0.0092, "step": 700 }, { "epoch": 4.18, "learning_rate": 5.823529411764707e-05, "loss": 0.0024, "step": 710 }, { "epoch": 4.24, "learning_rate": 5.764705882352941e-05, "loss": 0.002, "step": 720 }, { "epoch": 4.29, "learning_rate": 5.7058823529411766e-05, "loss": 0.0042, "step": 730 }, { "epoch": 4.35, "learning_rate": 5.647058823529412e-05, "loss": 0.0602, "step": 740 }, { "epoch": 4.41, "learning_rate": 5.588235294117647e-05, "loss": 0.0062, "step": 750 }, { "epoch": 4.47, "learning_rate": 5.529411764705883e-05, "loss": 0.003, "step": 760 }, { "epoch": 4.53, "learning_rate": 5.4705882352941185e-05, "loss": 0.0088, "step": 770 }, { "epoch": 4.59, "learning_rate": 5.411764705882353e-05, "loss": 0.0025, "step": 780 }, { "epoch": 4.65, "learning_rate": 5.3529411764705884e-05, "loss": 0.0109, "step": 790 }, { "epoch": 4.71, "learning_rate": 5.294117647058824e-05, "loss": 0.0015, "step": 800 }, { "epoch": 4.76, "learning_rate": 5.235294117647059e-05, "loss": 0.0011, "step": 810 }, { "epoch": 4.82, "learning_rate": 5.176470588235295e-05, "loss": 0.0101, "step": 820 }, { "epoch": 4.88, "learning_rate": 5.117647058823529e-05, "loss": 0.0011, "step": 830 }, { "epoch": 4.94, "learning_rate": 5.058823529411765e-05, "loss": 0.0017, "step": 840 }, { "epoch": 5.0, "learning_rate": 5e-05, "loss": 0.0014, "step": 850 }, { "epoch": 5.0, "eval_loss": 0.001027416903525591, "eval_mse": 0.001027416903525591, "eval_runtime": 17.3556, "eval_samples_per_second": 4.149, "eval_steps_per_second": 0.519, "step": 850 }, { "epoch": 5.06, "learning_rate": 4.9411764705882355e-05, "loss": 0.0023, "step": 860 }, { "epoch": 5.12, "learning_rate": 4.882352941176471e-05, "loss": 0.0005, "step": 870 }, { "epoch": 5.18, "learning_rate": 4.823529411764706e-05, "loss": 0.0053, "step": 880 }, { "epoch": 5.24, "learning_rate": 4.7647058823529414e-05, "loss": 0.0026, "step": 890 }, { "epoch": 5.29, "learning_rate": 4.705882352941177e-05, "loss": 0.0045, "step": 900 }, { "epoch": 5.35, "learning_rate": 4.647058823529412e-05, "loss": 0.0017, "step": 910 }, { "epoch": 5.41, "learning_rate": 4.588235294117647e-05, "loss": 0.0012, "step": 920 }, { "epoch": 5.47, "learning_rate": 4.5294117647058826e-05, "loss": 0.0098, "step": 930 }, { "epoch": 5.53, "learning_rate": 4.470588235294118e-05, "loss": 0.0016, "step": 940 }, { "epoch": 5.59, "learning_rate": 4.411764705882353e-05, "loss": 0.0011, "step": 950 }, { "epoch": 5.65, "learning_rate": 4.3529411764705885e-05, "loss": 0.001, "step": 960 }, { "epoch": 5.71, "learning_rate": 4.294117647058823e-05, "loss": 0.0005, "step": 970 }, { "epoch": 5.76, "learning_rate": 4.235294117647059e-05, "loss": 0.0021, "step": 980 }, { "epoch": 5.82, "learning_rate": 4.1764705882352944e-05, "loss": 0.0107, "step": 990 }, { "epoch": 5.88, "learning_rate": 4.11764705882353e-05, "loss": 0.0086, "step": 1000 }, { "epoch": 5.94, "learning_rate": 4.058823529411765e-05, "loss": 0.0014, "step": 1010 }, { "epoch": 6.0, "learning_rate": 4e-05, "loss": 0.0008, "step": 1020 }, { "epoch": 6.0, "eval_loss": 0.001083881827071309, "eval_mse": 0.001083881827071309, "eval_runtime": 15.3262, "eval_samples_per_second": 4.698, "eval_steps_per_second": 0.587, "step": 1020 }, { "epoch": 6.06, "learning_rate": 3.9411764705882356e-05, "loss": 0.0012, "step": 1030 }, { "epoch": 6.12, "learning_rate": 3.882352941176471e-05, "loss": 0.002, "step": 1040 }, { "epoch": 6.18, "learning_rate": 3.8235294117647055e-05, "loss": 0.0062, "step": 1050 }, { "epoch": 6.24, "learning_rate": 3.7647058823529415e-05, "loss": 0.0017, "step": 1060 }, { "epoch": 6.29, "learning_rate": 3.705882352941177e-05, "loss": 0.0008, "step": 1070 }, { "epoch": 6.35, "learning_rate": 3.6470588235294114e-05, "loss": 0.0013, "step": 1080 }, { "epoch": 6.41, "learning_rate": 3.5882352941176474e-05, "loss": 0.0078, "step": 1090 }, { "epoch": 6.47, "learning_rate": 3.529411764705883e-05, "loss": 0.0026, "step": 1100 }, { "epoch": 6.53, "learning_rate": 3.470588235294118e-05, "loss": 0.0084, "step": 1110 }, { "epoch": 6.59, "learning_rate": 3.411764705882353e-05, "loss": 0.0016, "step": 1120 }, { "epoch": 6.65, "learning_rate": 3.352941176470588e-05, "loss": 0.0026, "step": 1130 }, { "epoch": 6.71, "learning_rate": 3.294117647058824e-05, "loss": 0.0053, "step": 1140 }, { "epoch": 6.76, "learning_rate": 3.235294117647059e-05, "loss": 0.0058, "step": 1150 }, { "epoch": 6.82, "learning_rate": 3.176470588235294e-05, "loss": 0.001, "step": 1160 }, { "epoch": 6.88, "learning_rate": 3.11764705882353e-05, "loss": 0.0011, "step": 1170 }, { "epoch": 6.94, "learning_rate": 3.058823529411765e-05, "loss": 0.001, "step": 1180 }, { "epoch": 7.0, "learning_rate": 3e-05, "loss": 0.0008, "step": 1190 }, { "epoch": 7.0, "eval_loss": 0.000933311355765909, "eval_mse": 0.000933311355765909, "eval_runtime": 15.2402, "eval_samples_per_second": 4.724, "eval_steps_per_second": 0.591, "step": 1190 }, { "epoch": 7.06, "learning_rate": 2.9411764705882354e-05, "loss": 0.0152, "step": 1200 }, { "epoch": 7.12, "learning_rate": 2.8823529411764703e-05, "loss": 0.0025, "step": 1210 }, { "epoch": 7.18, "learning_rate": 2.823529411764706e-05, "loss": 0.0018, "step": 1220 }, { "epoch": 7.24, "learning_rate": 2.7647058823529416e-05, "loss": 0.0011, "step": 1230 }, { "epoch": 7.29, "learning_rate": 2.7058823529411766e-05, "loss": 0.0076, "step": 1240 }, { "epoch": 7.35, "learning_rate": 2.647058823529412e-05, "loss": 0.0009, "step": 1250 }, { "epoch": 7.41, "learning_rate": 2.5882352941176475e-05, "loss": 0.0054, "step": 1260 }, { "epoch": 7.47, "learning_rate": 2.5294117647058825e-05, "loss": 0.0012, "step": 1270 }, { "epoch": 7.53, "learning_rate": 2.4705882352941178e-05, "loss": 0.0012, "step": 1280 }, { "epoch": 7.59, "learning_rate": 2.411764705882353e-05, "loss": 0.0005, "step": 1290 }, { "epoch": 7.65, "learning_rate": 2.3529411764705884e-05, "loss": 0.0011, "step": 1300 }, { "epoch": 7.71, "learning_rate": 2.2941176470588237e-05, "loss": 0.0022, "step": 1310 }, { "epoch": 7.76, "learning_rate": 2.235294117647059e-05, "loss": 0.0009, "step": 1320 }, { "epoch": 7.82, "learning_rate": 2.1764705882352943e-05, "loss": 0.0007, "step": 1330 }, { "epoch": 7.88, "learning_rate": 2.1176470588235296e-05, "loss": 0.0007, "step": 1340 }, { "epoch": 7.94, "learning_rate": 2.058823529411765e-05, "loss": 0.0007, "step": 1350 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 0.0008, "step": 1360 }, { "epoch": 8.0, "eval_loss": 0.001160233630798757, "eval_mse": 0.001160233747214079, "eval_runtime": 18.2978, "eval_samples_per_second": 3.935, "eval_steps_per_second": 0.492, "step": 1360 }, { "epoch": 8.06, "learning_rate": 1.9411764705882355e-05, "loss": 0.0072, "step": 1370 }, { "epoch": 8.12, "learning_rate": 1.8823529411764708e-05, "loss": 0.001, "step": 1380 }, { "epoch": 8.18, "learning_rate": 1.8235294117647057e-05, "loss": 0.0008, "step": 1390 }, { "epoch": 8.24, "learning_rate": 1.7647058823529414e-05, "loss": 0.0025, "step": 1400 }, { "epoch": 8.29, "learning_rate": 1.7058823529411767e-05, "loss": 0.001, "step": 1410 }, { "epoch": 8.35, "learning_rate": 1.647058823529412e-05, "loss": 0.0014, "step": 1420 }, { "epoch": 8.41, "learning_rate": 1.588235294117647e-05, "loss": 0.0015, "step": 1430 }, { "epoch": 8.47, "learning_rate": 1.5294117647058826e-05, "loss": 0.0041, "step": 1440 }, { "epoch": 8.53, "learning_rate": 1.4705882352941177e-05, "loss": 0.0012, "step": 1450 }, { "epoch": 8.59, "learning_rate": 1.411764705882353e-05, "loss": 0.0009, "step": 1460 }, { "epoch": 8.65, "learning_rate": 1.3529411764705883e-05, "loss": 0.0012, "step": 1470 }, { "epoch": 8.71, "learning_rate": 1.2941176470588238e-05, "loss": 0.0022, "step": 1480 }, { "epoch": 8.76, "learning_rate": 1.2352941176470589e-05, "loss": 0.0007, "step": 1490 }, { "epoch": 8.82, "learning_rate": 1.1764705882352942e-05, "loss": 0.0064, "step": 1500 }, { "epoch": 8.88, "learning_rate": 1.1176470588235295e-05, "loss": 0.0014, "step": 1510 }, { "epoch": 8.94, "learning_rate": 1.0588235294117648e-05, "loss": 0.0006, "step": 1520 }, { "epoch": 9.0, "learning_rate": 1e-05, "loss": 0.001, "step": 1530 }, { "epoch": 9.0, "eval_loss": 0.0007979701040312648, "eval_mse": 0.0007979701040312648, "eval_runtime": 17.0378, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.528, "step": 1530 }, { "epoch": 9.06, "learning_rate": 9.411764705882354e-06, "loss": 0.0009, "step": 1540 }, { "epoch": 9.12, "learning_rate": 8.823529411764707e-06, "loss": 0.0008, "step": 1550 }, { "epoch": 9.18, "learning_rate": 8.23529411764706e-06, "loss": 0.0021, "step": 1560 }, { "epoch": 9.24, "learning_rate": 7.647058823529413e-06, "loss": 0.0009, "step": 1570 }, { "epoch": 9.29, "learning_rate": 7.058823529411765e-06, "loss": 0.0006, "step": 1580 }, { "epoch": 9.35, "learning_rate": 6.470588235294119e-06, "loss": 0.0005, "step": 1590 }, { "epoch": 9.41, "learning_rate": 5.882352941176471e-06, "loss": 0.0005, "step": 1600 }, { "epoch": 9.47, "learning_rate": 5.294117647058824e-06, "loss": 0.0007, "step": 1610 }, { "epoch": 9.53, "learning_rate": 4.705882352941177e-06, "loss": 0.0033, "step": 1620 }, { "epoch": 9.59, "learning_rate": 4.11764705882353e-06, "loss": 0.0009, "step": 1630 }, { "epoch": 9.65, "learning_rate": 3.5294117647058825e-06, "loss": 0.0008, "step": 1640 }, { "epoch": 9.71, "learning_rate": 2.9411764705882355e-06, "loss": 0.0007, "step": 1650 }, { "epoch": 9.76, "learning_rate": 2.3529411764705885e-06, "loss": 0.002, "step": 1660 }, { "epoch": 9.82, "learning_rate": 1.7647058823529412e-06, "loss": 0.0009, "step": 1670 }, { "epoch": 9.88, "learning_rate": 1.1764705882352942e-06, "loss": 0.0008, "step": 1680 }, { "epoch": 9.94, "learning_rate": 5.882352941176471e-07, "loss": 0.0063, "step": 1690 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.0004, "step": 1700 } ], "max_steps": 1700, "num_train_epochs": 10, "total_flos": 0.0, "trial_name": null, "trial_params": null }