roberta-base-ckb / trainer_state.json
razhan's picture
End of training
1bb5e5b
raw
history blame
28.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 22854,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.978121991773869e-05,
"loss": 6.7838,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 4.956243983547738e-05,
"loss": 6.1059,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 4.9343659753216074e-05,
"loss": 5.9324,
"step": 300
},
{
"epoch": 0.05,
"learning_rate": 4.912487967095476e-05,
"loss": 5.7856,
"step": 400
},
{
"epoch": 0.07,
"learning_rate": 4.890609958869345e-05,
"loss": 5.7135,
"step": 500
},
{
"epoch": 0.08,
"learning_rate": 4.868731950643214e-05,
"loss": 5.6541,
"step": 600
},
{
"epoch": 0.09,
"learning_rate": 4.846853942417083e-05,
"loss": 5.5545,
"step": 700
},
{
"epoch": 0.11,
"learning_rate": 4.824975934190952e-05,
"loss": 5.5722,
"step": 800
},
{
"epoch": 0.12,
"learning_rate": 4.803097925964821e-05,
"loss": 5.4627,
"step": 900
},
{
"epoch": 0.13,
"learning_rate": 4.7812199177386893e-05,
"loss": 5.4158,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 4.759341909512558e-05,
"loss": 5.3269,
"step": 1100
},
{
"epoch": 0.16,
"learning_rate": 4.737463901286427e-05,
"loss": 5.3289,
"step": 1200
},
{
"epoch": 0.17,
"learning_rate": 4.715585893060296e-05,
"loss": 5.3057,
"step": 1300
},
{
"epoch": 0.18,
"learning_rate": 4.693707884834165e-05,
"loss": 5.2426,
"step": 1400
},
{
"epoch": 0.2,
"learning_rate": 4.671829876608034e-05,
"loss": 5.1834,
"step": 1500
},
{
"epoch": 0.21,
"learning_rate": 4.649951868381903e-05,
"loss": 5.1129,
"step": 1600
},
{
"epoch": 0.22,
"learning_rate": 4.628073860155771e-05,
"loss": 5.1133,
"step": 1700
},
{
"epoch": 0.24,
"learning_rate": 4.6061958519296404e-05,
"loss": 4.9882,
"step": 1800
},
{
"epoch": 0.25,
"learning_rate": 4.5843178437035094e-05,
"loss": 5.0315,
"step": 1900
},
{
"epoch": 0.26,
"learning_rate": 4.5624398354773784e-05,
"loss": 4.9776,
"step": 2000
},
{
"epoch": 0.28,
"learning_rate": 4.5405618272512475e-05,
"loss": 4.9737,
"step": 2100
},
{
"epoch": 0.29,
"learning_rate": 4.5186838190251165e-05,
"loss": 4.8419,
"step": 2200
},
{
"epoch": 0.3,
"learning_rate": 4.496805810798985e-05,
"loss": 4.8256,
"step": 2300
},
{
"epoch": 0.32,
"learning_rate": 4.474927802572854e-05,
"loss": 4.8925,
"step": 2400
},
{
"epoch": 0.33,
"learning_rate": 4.453049794346723e-05,
"loss": 4.7332,
"step": 2500
},
{
"epoch": 0.34,
"learning_rate": 4.431171786120592e-05,
"loss": 4.7318,
"step": 2600
},
{
"epoch": 0.35,
"learning_rate": 4.409293777894461e-05,
"loss": 4.69,
"step": 2700
},
{
"epoch": 0.37,
"learning_rate": 4.3874157696683295e-05,
"loss": 4.6975,
"step": 2800
},
{
"epoch": 0.38,
"learning_rate": 4.3655377614421985e-05,
"loss": 4.7073,
"step": 2900
},
{
"epoch": 0.39,
"learning_rate": 4.3436597532160675e-05,
"loss": 4.5963,
"step": 3000
},
{
"epoch": 0.41,
"learning_rate": 4.321781744989936e-05,
"loss": 4.5638,
"step": 3100
},
{
"epoch": 0.42,
"learning_rate": 4.299903736763805e-05,
"loss": 4.5798,
"step": 3200
},
{
"epoch": 0.43,
"learning_rate": 4.278025728537674e-05,
"loss": 4.4876,
"step": 3300
},
{
"epoch": 0.45,
"learning_rate": 4.256147720311543e-05,
"loss": 4.5954,
"step": 3400
},
{
"epoch": 0.46,
"learning_rate": 4.234269712085412e-05,
"loss": 4.4998,
"step": 3500
},
{
"epoch": 0.47,
"learning_rate": 4.212391703859281e-05,
"loss": 4.495,
"step": 3600
},
{
"epoch": 0.49,
"learning_rate": 4.1905136956331495e-05,
"loss": 4.537,
"step": 3700
},
{
"epoch": 0.5,
"learning_rate": 4.1686356874070186e-05,
"loss": 4.4669,
"step": 3800
},
{
"epoch": 0.51,
"learning_rate": 4.1467576791808876e-05,
"loss": 4.5145,
"step": 3900
},
{
"epoch": 0.53,
"learning_rate": 4.1248796709547566e-05,
"loss": 4.3922,
"step": 4000
},
{
"epoch": 0.54,
"learning_rate": 4.103001662728626e-05,
"loss": 4.4273,
"step": 4100
},
{
"epoch": 0.55,
"learning_rate": 4.081123654502495e-05,
"loss": 4.3919,
"step": 4200
},
{
"epoch": 0.56,
"learning_rate": 4.059245646276363e-05,
"loss": 4.3121,
"step": 4300
},
{
"epoch": 0.58,
"learning_rate": 4.037367638050232e-05,
"loss": 4.419,
"step": 4400
},
{
"epoch": 0.59,
"learning_rate": 4.015489629824101e-05,
"loss": 4.3461,
"step": 4500
},
{
"epoch": 0.6,
"learning_rate": 3.9936116215979696e-05,
"loss": 4.2377,
"step": 4600
},
{
"epoch": 0.62,
"learning_rate": 3.9717336133718386e-05,
"loss": 4.2837,
"step": 4700
},
{
"epoch": 0.63,
"learning_rate": 3.9498556051457077e-05,
"loss": 4.2739,
"step": 4800
},
{
"epoch": 0.64,
"learning_rate": 3.927977596919577e-05,
"loss": 4.2504,
"step": 4900
},
{
"epoch": 0.66,
"learning_rate": 3.906099588693445e-05,
"loss": 4.2794,
"step": 5000
},
{
"epoch": 0.67,
"learning_rate": 3.884221580467314e-05,
"loss": 4.2125,
"step": 5100
},
{
"epoch": 0.68,
"learning_rate": 3.862343572241183e-05,
"loss": 4.2143,
"step": 5200
},
{
"epoch": 0.7,
"learning_rate": 3.840465564015052e-05,
"loss": 4.1628,
"step": 5300
},
{
"epoch": 0.71,
"learning_rate": 3.818587555788921e-05,
"loss": 4.1445,
"step": 5400
},
{
"epoch": 0.72,
"learning_rate": 3.79670954756279e-05,
"loss": 4.237,
"step": 5500
},
{
"epoch": 0.74,
"learning_rate": 3.774831539336659e-05,
"loss": 4.2284,
"step": 5600
},
{
"epoch": 0.75,
"learning_rate": 3.752953531110528e-05,
"loss": 4.1451,
"step": 5700
},
{
"epoch": 0.76,
"learning_rate": 3.731075522884397e-05,
"loss": 4.1644,
"step": 5800
},
{
"epoch": 0.77,
"learning_rate": 3.709197514658266e-05,
"loss": 4.1351,
"step": 5900
},
{
"epoch": 0.79,
"learning_rate": 3.687319506432135e-05,
"loss": 4.1229,
"step": 6000
},
{
"epoch": 0.8,
"learning_rate": 3.665441498206004e-05,
"loss": 4.1166,
"step": 6100
},
{
"epoch": 0.81,
"learning_rate": 3.643563489979873e-05,
"loss": 4.1468,
"step": 6200
},
{
"epoch": 0.83,
"learning_rate": 3.621685481753741e-05,
"loss": 4.1475,
"step": 6300
},
{
"epoch": 0.84,
"learning_rate": 3.59980747352761e-05,
"loss": 4.1488,
"step": 6400
},
{
"epoch": 0.85,
"learning_rate": 3.577929465301479e-05,
"loss": 4.0431,
"step": 6500
},
{
"epoch": 0.87,
"learning_rate": 3.556051457075348e-05,
"loss": 3.9611,
"step": 6600
},
{
"epoch": 0.88,
"learning_rate": 3.534173448849217e-05,
"loss": 4.1072,
"step": 6700
},
{
"epoch": 0.89,
"learning_rate": 3.512295440623086e-05,
"loss": 4.0247,
"step": 6800
},
{
"epoch": 0.91,
"learning_rate": 3.490417432396955e-05,
"loss": 3.9853,
"step": 6900
},
{
"epoch": 0.92,
"learning_rate": 3.468539424170823e-05,
"loss": 3.9586,
"step": 7000
},
{
"epoch": 0.93,
"learning_rate": 3.446661415944692e-05,
"loss": 4.0029,
"step": 7100
},
{
"epoch": 0.95,
"learning_rate": 3.4247834077185614e-05,
"loss": 4.0219,
"step": 7200
},
{
"epoch": 0.96,
"learning_rate": 3.4029053994924304e-05,
"loss": 3.9708,
"step": 7300
},
{
"epoch": 0.97,
"learning_rate": 3.3810273912662995e-05,
"loss": 3.9856,
"step": 7400
},
{
"epoch": 0.98,
"learning_rate": 3.3591493830401685e-05,
"loss": 3.9354,
"step": 7500
},
{
"epoch": 1.0,
"learning_rate": 3.337271374814037e-05,
"loss": 3.98,
"step": 7600
},
{
"epoch": 1.01,
"learning_rate": 3.315393366587906e-05,
"loss": 3.9338,
"step": 7700
},
{
"epoch": 1.02,
"learning_rate": 3.293515358361775e-05,
"loss": 3.9415,
"step": 7800
},
{
"epoch": 1.04,
"learning_rate": 3.271637350135644e-05,
"loss": 3.9926,
"step": 7900
},
{
"epoch": 1.05,
"learning_rate": 3.249759341909513e-05,
"loss": 3.9005,
"step": 8000
},
{
"epoch": 1.06,
"learning_rate": 3.227881333683382e-05,
"loss": 3.9296,
"step": 8100
},
{
"epoch": 1.08,
"learning_rate": 3.2060033254572505e-05,
"loss": 3.9252,
"step": 8200
},
{
"epoch": 1.09,
"learning_rate": 3.184125317231119e-05,
"loss": 3.8741,
"step": 8300
},
{
"epoch": 1.1,
"learning_rate": 3.162247309004988e-05,
"loss": 3.9018,
"step": 8400
},
{
"epoch": 1.12,
"learning_rate": 3.140369300778857e-05,
"loss": 3.8671,
"step": 8500
},
{
"epoch": 1.13,
"learning_rate": 3.118491292552726e-05,
"loss": 3.9379,
"step": 8600
},
{
"epoch": 1.14,
"learning_rate": 3.096613284326595e-05,
"loss": 3.8542,
"step": 8700
},
{
"epoch": 1.16,
"learning_rate": 3.074735276100464e-05,
"loss": 3.7767,
"step": 8800
},
{
"epoch": 1.17,
"learning_rate": 3.052857267874333e-05,
"loss": 3.7247,
"step": 8900
},
{
"epoch": 1.18,
"learning_rate": 3.0309792596482018e-05,
"loss": 3.7847,
"step": 9000
},
{
"epoch": 1.19,
"learning_rate": 3.0091012514220705e-05,
"loss": 3.7639,
"step": 9100
},
{
"epoch": 1.21,
"learning_rate": 2.9872232431959396e-05,
"loss": 3.7938,
"step": 9200
},
{
"epoch": 1.22,
"learning_rate": 2.9653452349698086e-05,
"loss": 3.7549,
"step": 9300
},
{
"epoch": 1.23,
"learning_rate": 2.9434672267436773e-05,
"loss": 3.774,
"step": 9400
},
{
"epoch": 1.25,
"learning_rate": 2.9215892185175464e-05,
"loss": 3.7728,
"step": 9500
},
{
"epoch": 1.26,
"learning_rate": 2.8997112102914154e-05,
"loss": 3.7448,
"step": 9600
},
{
"epoch": 1.27,
"learning_rate": 2.877833202065284e-05,
"loss": 3.7276,
"step": 9700
},
{
"epoch": 1.29,
"learning_rate": 2.8559551938391532e-05,
"loss": 3.7897,
"step": 9800
},
{
"epoch": 1.3,
"learning_rate": 2.8340771856130222e-05,
"loss": 3.7273,
"step": 9900
},
{
"epoch": 1.31,
"learning_rate": 2.812199177386891e-05,
"loss": 3.745,
"step": 10000
},
{
"epoch": 1.33,
"learning_rate": 2.7903211691607596e-05,
"loss": 3.7491,
"step": 10100
},
{
"epoch": 1.34,
"learning_rate": 2.7684431609346283e-05,
"loss": 3.8055,
"step": 10200
},
{
"epoch": 1.35,
"learning_rate": 2.7465651527084974e-05,
"loss": 3.6932,
"step": 10300
},
{
"epoch": 1.37,
"learning_rate": 2.7246871444823664e-05,
"loss": 3.7565,
"step": 10400
},
{
"epoch": 1.38,
"learning_rate": 2.702809136256235e-05,
"loss": 3.6891,
"step": 10500
},
{
"epoch": 1.39,
"learning_rate": 2.6809311280301042e-05,
"loss": 3.7535,
"step": 10600
},
{
"epoch": 1.4,
"learning_rate": 2.6590531198039732e-05,
"loss": 3.5967,
"step": 10700
},
{
"epoch": 1.42,
"learning_rate": 2.637175111577842e-05,
"loss": 3.637,
"step": 10800
},
{
"epoch": 1.43,
"learning_rate": 2.615297103351711e-05,
"loss": 3.6867,
"step": 10900
},
{
"epoch": 1.44,
"learning_rate": 2.59341909512558e-05,
"loss": 3.6911,
"step": 11000
},
{
"epoch": 1.46,
"learning_rate": 2.5715410868994487e-05,
"loss": 3.6094,
"step": 11100
},
{
"epoch": 1.47,
"learning_rate": 2.5496630786733178e-05,
"loss": 3.6662,
"step": 11200
},
{
"epoch": 1.48,
"learning_rate": 2.5277850704471868e-05,
"loss": 3.6195,
"step": 11300
},
{
"epoch": 1.5,
"learning_rate": 2.5059070622210555e-05,
"loss": 3.6519,
"step": 11400
},
{
"epoch": 1.51,
"learning_rate": 2.4840290539949242e-05,
"loss": 3.6181,
"step": 11500
},
{
"epoch": 1.52,
"learning_rate": 2.4621510457687933e-05,
"loss": 3.7262,
"step": 11600
},
{
"epoch": 1.54,
"learning_rate": 2.4402730375426623e-05,
"loss": 3.6216,
"step": 11700
},
{
"epoch": 1.55,
"learning_rate": 2.418395029316531e-05,
"loss": 3.613,
"step": 11800
},
{
"epoch": 1.56,
"learning_rate": 2.3965170210904e-05,
"loss": 3.6809,
"step": 11900
},
{
"epoch": 1.58,
"learning_rate": 2.374639012864269e-05,
"loss": 3.6462,
"step": 12000
},
{
"epoch": 1.59,
"learning_rate": 2.352761004638138e-05,
"loss": 3.6063,
"step": 12100
},
{
"epoch": 1.6,
"learning_rate": 2.330882996412007e-05,
"loss": 3.6304,
"step": 12200
},
{
"epoch": 1.61,
"learning_rate": 2.309004988185876e-05,
"loss": 3.6105,
"step": 12300
},
{
"epoch": 1.63,
"learning_rate": 2.2871269799597443e-05,
"loss": 3.5885,
"step": 12400
},
{
"epoch": 1.64,
"learning_rate": 2.2652489717336133e-05,
"loss": 3.6111,
"step": 12500
},
{
"epoch": 1.65,
"learning_rate": 2.2433709635074824e-05,
"loss": 3.572,
"step": 12600
},
{
"epoch": 1.67,
"learning_rate": 2.221492955281351e-05,
"loss": 3.6038,
"step": 12700
},
{
"epoch": 1.68,
"learning_rate": 2.19961494705522e-05,
"loss": 3.6314,
"step": 12800
},
{
"epoch": 1.69,
"learning_rate": 2.1777369388290892e-05,
"loss": 3.5771,
"step": 12900
},
{
"epoch": 1.71,
"learning_rate": 2.1558589306029582e-05,
"loss": 3.5381,
"step": 13000
},
{
"epoch": 1.72,
"learning_rate": 2.133980922376827e-05,
"loss": 3.5831,
"step": 13100
},
{
"epoch": 1.73,
"learning_rate": 2.112102914150696e-05,
"loss": 3.5452,
"step": 13200
},
{
"epoch": 1.75,
"learning_rate": 2.0902249059245647e-05,
"loss": 3.6384,
"step": 13300
},
{
"epoch": 1.76,
"learning_rate": 2.0683468976984334e-05,
"loss": 3.5737,
"step": 13400
},
{
"epoch": 1.77,
"learning_rate": 2.0464688894723024e-05,
"loss": 3.684,
"step": 13500
},
{
"epoch": 1.79,
"learning_rate": 2.0245908812461715e-05,
"loss": 3.6435,
"step": 13600
},
{
"epoch": 1.8,
"learning_rate": 2.0027128730200402e-05,
"loss": 3.5359,
"step": 13700
},
{
"epoch": 1.81,
"learning_rate": 1.9808348647939092e-05,
"loss": 3.5537,
"step": 13800
},
{
"epoch": 1.82,
"learning_rate": 1.9589568565677783e-05,
"loss": 3.5483,
"step": 13900
},
{
"epoch": 1.84,
"learning_rate": 1.937078848341647e-05,
"loss": 3.4914,
"step": 14000
},
{
"epoch": 1.85,
"learning_rate": 1.915200840115516e-05,
"loss": 3.4733,
"step": 14100
},
{
"epoch": 1.86,
"learning_rate": 1.8933228318893847e-05,
"loss": 3.5044,
"step": 14200
},
{
"epoch": 1.88,
"learning_rate": 1.8714448236632538e-05,
"loss": 3.5383,
"step": 14300
},
{
"epoch": 1.89,
"learning_rate": 1.8495668154371225e-05,
"loss": 3.5945,
"step": 14400
},
{
"epoch": 1.9,
"learning_rate": 1.8276888072109915e-05,
"loss": 3.5568,
"step": 14500
},
{
"epoch": 1.92,
"learning_rate": 1.8058107989848606e-05,
"loss": 3.4929,
"step": 14600
},
{
"epoch": 1.93,
"learning_rate": 1.7839327907587293e-05,
"loss": 3.5268,
"step": 14700
},
{
"epoch": 1.94,
"learning_rate": 1.7620547825325983e-05,
"loss": 3.5304,
"step": 14800
},
{
"epoch": 1.96,
"learning_rate": 1.7401767743064674e-05,
"loss": 3.4343,
"step": 14900
},
{
"epoch": 1.97,
"learning_rate": 1.718298766080336e-05,
"loss": 3.5578,
"step": 15000
},
{
"epoch": 1.98,
"learning_rate": 1.6964207578542048e-05,
"loss": 3.4089,
"step": 15100
},
{
"epoch": 2.0,
"learning_rate": 1.674542749628074e-05,
"loss": 3.4965,
"step": 15200
},
{
"epoch": 2.01,
"learning_rate": 1.652664741401943e-05,
"loss": 3.4984,
"step": 15300
},
{
"epoch": 2.02,
"learning_rate": 1.6307867331758116e-05,
"loss": 3.5046,
"step": 15400
},
{
"epoch": 2.03,
"learning_rate": 1.6089087249496806e-05,
"loss": 3.5016,
"step": 15500
},
{
"epoch": 2.05,
"learning_rate": 1.5870307167235497e-05,
"loss": 3.5667,
"step": 15600
},
{
"epoch": 2.06,
"learning_rate": 1.5651527084974184e-05,
"loss": 3.5435,
"step": 15700
},
{
"epoch": 2.07,
"learning_rate": 1.5432747002712874e-05,
"loss": 3.5086,
"step": 15800
},
{
"epoch": 2.09,
"learning_rate": 1.5213966920451563e-05,
"loss": 3.4831,
"step": 15900
},
{
"epoch": 2.1,
"learning_rate": 1.499518683819025e-05,
"loss": 3.3926,
"step": 16000
},
{
"epoch": 2.11,
"learning_rate": 1.4776406755928939e-05,
"loss": 3.4086,
"step": 16100
},
{
"epoch": 2.13,
"learning_rate": 1.455762667366763e-05,
"loss": 3.4987,
"step": 16200
},
{
"epoch": 2.14,
"learning_rate": 1.4338846591406318e-05,
"loss": 3.4697,
"step": 16300
},
{
"epoch": 2.15,
"learning_rate": 1.4120066509145009e-05,
"loss": 3.5231,
"step": 16400
},
{
"epoch": 2.17,
"learning_rate": 1.3901286426883698e-05,
"loss": 3.4806,
"step": 16500
},
{
"epoch": 2.18,
"learning_rate": 1.3682506344622386e-05,
"loss": 3.5263,
"step": 16600
},
{
"epoch": 2.19,
"learning_rate": 1.3463726262361077e-05,
"loss": 3.3846,
"step": 16700
},
{
"epoch": 2.21,
"learning_rate": 1.3244946180099765e-05,
"loss": 3.4415,
"step": 16800
},
{
"epoch": 2.22,
"learning_rate": 1.3026166097838454e-05,
"loss": 3.3833,
"step": 16900
},
{
"epoch": 2.23,
"learning_rate": 1.2807386015577141e-05,
"loss": 3.4666,
"step": 17000
},
{
"epoch": 2.24,
"learning_rate": 1.258860593331583e-05,
"loss": 3.4884,
"step": 17100
},
{
"epoch": 2.26,
"learning_rate": 1.236982585105452e-05,
"loss": 3.3259,
"step": 17200
},
{
"epoch": 2.27,
"learning_rate": 1.215104576879321e-05,
"loss": 3.4231,
"step": 17300
},
{
"epoch": 2.28,
"learning_rate": 1.1932265686531898e-05,
"loss": 3.4166,
"step": 17400
},
{
"epoch": 2.3,
"learning_rate": 1.1713485604270589e-05,
"loss": 3.4279,
"step": 17500
},
{
"epoch": 2.31,
"learning_rate": 1.1494705522009276e-05,
"loss": 3.4174,
"step": 17600
},
{
"epoch": 2.32,
"learning_rate": 1.1275925439747966e-05,
"loss": 3.5166,
"step": 17700
},
{
"epoch": 2.34,
"learning_rate": 1.1057145357486655e-05,
"loss": 3.4309,
"step": 17800
},
{
"epoch": 2.35,
"learning_rate": 1.0838365275225344e-05,
"loss": 3.4038,
"step": 17900
},
{
"epoch": 2.36,
"learning_rate": 1.0619585192964034e-05,
"loss": 3.3785,
"step": 18000
},
{
"epoch": 2.38,
"learning_rate": 1.0400805110702721e-05,
"loss": 3.4292,
"step": 18100
},
{
"epoch": 2.39,
"learning_rate": 1.0182025028441412e-05,
"loss": 3.4647,
"step": 18200
},
{
"epoch": 2.4,
"learning_rate": 9.9632449461801e-06,
"loss": 3.4627,
"step": 18300
},
{
"epoch": 2.42,
"learning_rate": 9.744464863918789e-06,
"loss": 3.3596,
"step": 18400
},
{
"epoch": 2.43,
"learning_rate": 9.52568478165748e-06,
"loss": 3.3967,
"step": 18500
},
{
"epoch": 2.44,
"learning_rate": 9.306904699396167e-06,
"loss": 3.4934,
"step": 18600
},
{
"epoch": 2.45,
"learning_rate": 9.088124617134857e-06,
"loss": 3.4232,
"step": 18700
},
{
"epoch": 2.47,
"learning_rate": 8.869344534873546e-06,
"loss": 3.3957,
"step": 18800
},
{
"epoch": 2.48,
"learning_rate": 8.650564452612235e-06,
"loss": 3.396,
"step": 18900
},
{
"epoch": 2.49,
"learning_rate": 8.431784370350923e-06,
"loss": 3.4012,
"step": 19000
},
{
"epoch": 2.51,
"learning_rate": 8.213004288089612e-06,
"loss": 3.3845,
"step": 19100
},
{
"epoch": 2.52,
"learning_rate": 7.994224205828303e-06,
"loss": 3.3873,
"step": 19200
},
{
"epoch": 2.53,
"learning_rate": 7.775444123566991e-06,
"loss": 3.4209,
"step": 19300
},
{
"epoch": 2.55,
"learning_rate": 7.55666404130568e-06,
"loss": 3.3871,
"step": 19400
},
{
"epoch": 2.56,
"learning_rate": 7.337883959044369e-06,
"loss": 3.4716,
"step": 19500
},
{
"epoch": 2.57,
"learning_rate": 7.119103876783058e-06,
"loss": 3.4281,
"step": 19600
},
{
"epoch": 2.59,
"learning_rate": 6.900323794521747e-06,
"loss": 3.3927,
"step": 19700
},
{
"epoch": 2.6,
"learning_rate": 6.681543712260437e-06,
"loss": 3.374,
"step": 19800
},
{
"epoch": 2.61,
"learning_rate": 6.462763629999125e-06,
"loss": 3.3668,
"step": 19900
},
{
"epoch": 2.63,
"learning_rate": 6.2439835477378135e-06,
"loss": 3.3673,
"step": 20000
},
{
"epoch": 2.64,
"learning_rate": 6.025203465476503e-06,
"loss": 3.3633,
"step": 20100
},
{
"epoch": 2.65,
"learning_rate": 5.806423383215193e-06,
"loss": 3.3235,
"step": 20200
},
{
"epoch": 2.66,
"learning_rate": 5.5876433009538815e-06,
"loss": 3.3816,
"step": 20300
},
{
"epoch": 2.68,
"learning_rate": 5.36886321869257e-06,
"loss": 3.3943,
"step": 20400
},
{
"epoch": 2.69,
"learning_rate": 5.150083136431259e-06,
"loss": 3.3534,
"step": 20500
},
{
"epoch": 2.7,
"learning_rate": 4.931303054169949e-06,
"loss": 3.4587,
"step": 20600
},
{
"epoch": 2.72,
"learning_rate": 4.7125229719086374e-06,
"loss": 3.3572,
"step": 20700
},
{
"epoch": 2.73,
"learning_rate": 4.493742889647327e-06,
"loss": 3.3056,
"step": 20800
},
{
"epoch": 2.74,
"learning_rate": 4.274962807386016e-06,
"loss": 3.3853,
"step": 20900
},
{
"epoch": 2.76,
"learning_rate": 4.0561827251247046e-06,
"loss": 3.4141,
"step": 21000
},
{
"epoch": 2.77,
"learning_rate": 3.837402642863394e-06,
"loss": 3.3369,
"step": 21100
},
{
"epoch": 2.78,
"learning_rate": 3.618622560602083e-06,
"loss": 3.366,
"step": 21200
},
{
"epoch": 2.8,
"learning_rate": 3.399842478340772e-06,
"loss": 3.4209,
"step": 21300
},
{
"epoch": 2.81,
"learning_rate": 3.181062396079461e-06,
"loss": 3.3659,
"step": 21400
},
{
"epoch": 2.82,
"learning_rate": 2.96228231381815e-06,
"loss": 3.3432,
"step": 21500
},
{
"epoch": 2.84,
"learning_rate": 2.7435022315568393e-06,
"loss": 3.4514,
"step": 21600
},
{
"epoch": 2.85,
"learning_rate": 2.524722149295528e-06,
"loss": 3.3064,
"step": 21700
},
{
"epoch": 2.86,
"learning_rate": 2.3059420670342172e-06,
"loss": 3.4154,
"step": 21800
},
{
"epoch": 2.87,
"learning_rate": 2.0871619847729064e-06,
"loss": 3.3934,
"step": 21900
},
{
"epoch": 2.89,
"learning_rate": 1.8683819025115952e-06,
"loss": 3.3657,
"step": 22000
},
{
"epoch": 2.9,
"learning_rate": 1.6496018202502846e-06,
"loss": 3.3707,
"step": 22100
},
{
"epoch": 2.91,
"learning_rate": 1.4308217379889735e-06,
"loss": 3.3213,
"step": 22200
},
{
"epoch": 2.93,
"learning_rate": 1.2120416557276627e-06,
"loss": 3.351,
"step": 22300
},
{
"epoch": 2.94,
"learning_rate": 9.932615734663517e-07,
"loss": 3.3376,
"step": 22400
},
{
"epoch": 2.95,
"learning_rate": 7.744814912050407e-07,
"loss": 3.2788,
"step": 22500
},
{
"epoch": 2.97,
"learning_rate": 5.557014089437298e-07,
"loss": 3.3773,
"step": 22600
},
{
"epoch": 2.98,
"learning_rate": 3.3692132668241884e-07,
"loss": 3.3724,
"step": 22700
},
{
"epoch": 2.99,
"learning_rate": 1.1814124442110792e-07,
"loss": 3.3345,
"step": 22800
},
{
"epoch": 3.0,
"step": 22854,
"total_flos": 1.2032870585175245e+17,
"train_loss": 3.9046004770976137,
"train_runtime": 22926.8282,
"train_samples_per_second": 19.935,
"train_steps_per_second": 0.997
}
],
"max_steps": 22854,
"num_train_epochs": 3,
"total_flos": 1.2032870585175245e+17,
"trial_name": null,
"trial_params": null
}