cube_to_round_box_146-neivonm3xj / trainer_state.json

Upload trainer_state.json with huggingface_hub

00ff434 verified 3 months ago

31.1 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 19.908045977011493,
	"eval_steps": 500,
	"global_step": 1732,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.11494252873563218,
	"grad_norm": 5.833221435546875,
	"learning_rate": 2.2988505747126437e-05,
	"loss": 1.1691,
	"step": 10
	},
	{
	"epoch": 0.22988505747126436,
	"grad_norm": 3.3890936374664307,
	"learning_rate": 4.597701149425287e-05,
	"loss": 0.4962,
	"step": 20
	},
	{
	"epoch": 0.3448275862068966,
	"grad_norm": 1.4635858535766602,
	"learning_rate": 6.896551724137931e-05,
	"loss": 0.2852,
	"step": 30
	},
	{
	"epoch": 0.45977011494252873,
	"grad_norm": 1.3263287544250488,
	"learning_rate": 9.195402298850575e-05,
	"loss": 0.2068,
	"step": 40
	},
	{
	"epoch": 0.5747126436781609,
	"grad_norm": 1.4933586120605469,
	"learning_rate": 0.00011494252873563218,
	"loss": 0.1671,
	"step": 50
	},
	{
	"epoch": 0.6896551724137931,
	"grad_norm": 3.461280584335327,
	"learning_rate": 0.00013793103448275863,
	"loss": 0.163,
	"step": 60
	},
	{
	"epoch": 0.8045977011494253,
	"grad_norm": 0.8741048574447632,
	"learning_rate": 0.00016091954022988506,
	"loss": 0.1625,
	"step": 70
	},
	{
	"epoch": 0.9195402298850575,
	"grad_norm": 0.9238091111183167,
	"learning_rate": 0.0001839080459770115,
	"loss": 0.1292,
	"step": 80
	},
	{
	"epoch": 1.0344827586206897,
	"grad_norm": 1.1964523792266846,
	"learning_rate": 0.00019999835873288997,
	"loss": 0.1137,
	"step": 90
	},
	{
	"epoch": 1.1494252873563218,
	"grad_norm": 0.6014072895050049,
	"learning_rate": 0.0001999691821496584,
	"loss": 0.1066,
	"step": 100
	},
	{
	"epoch": 1.264367816091954,
	"grad_norm": 0.6493586897850037,
	"learning_rate": 0.00019990354521250803,
	"loss": 0.1051,
	"step": 110
	},
	{
	"epoch": 1.3793103448275863,
	"grad_norm": 0.6839510202407837,
	"learning_rate": 0.00019980147186027586,
	"loss": 0.0926,
	"step": 120
	},
	{
	"epoch": 1.4942528735632183,
	"grad_norm": 0.5401099920272827,
	"learning_rate": 0.00019966299932074023,
	"loss": 0.0895,
	"step": 130
	},
	{
	"epoch": 1.6091954022988506,
	"grad_norm": 1.1563774347305298,
	"learning_rate": 0.000199488178097043,
	"loss": 0.0915,
	"step": 140
	},
	{
	"epoch": 1.7241379310344827,
	"grad_norm": 2.1474926471710205,
	"learning_rate": 0.00019927707194927066,
	"loss": 0.0853,
	"step": 150
	},
	{
	"epoch": 1.839080459770115,
	"grad_norm": 0.7267495393753052,
	"learning_rate": 0.00019902975787119956,
	"loss": 0.0873,
	"step": 160
	},
	{
	"epoch": 1.9540229885057472,
	"grad_norm": 0.5461205244064331,
	"learning_rate": 0.00019874632606221545,
	"loss": 0.0739,
	"step": 170
	},
	{
	"epoch": 2.0689655172413794,
	"grad_norm": 0.43381860852241516,
	"learning_rate": 0.00019842687989441604,
	"loss": 0.0682,
	"step": 180
	},
	{
	"epoch": 2.1839080459770113,
	"grad_norm": 0.619968593120575,
	"learning_rate": 0.00019807153587490963,
	"loss": 0.0725,
	"step": 190
	},
	{
	"epoch": 2.2988505747126435,
	"grad_norm": 0.5531813502311707,
	"learning_rate": 0.00019768042360332325,
	"loss": 0.0649,
	"step": 200
	},
	{
	"epoch": 2.413793103448276,
	"grad_norm": 0.4325454831123352,
	"learning_rate": 0.00019725368572453539,
	"loss": 0.0629,
	"step": 210
	},
	{
	"epoch": 2.528735632183908,
	"grad_norm": 1.132866382598877,
	"learning_rate": 0.00019679147787665126,
	"loss": 0.0597,
	"step": 220
	},
	{
	"epoch": 2.6436781609195403,
	"grad_norm": 0.5158783793449402,
	"learning_rate": 0.00019629396863423911,
	"loss": 0.0658,
	"step": 230
	},
	{
	"epoch": 2.7586206896551726,
	"grad_norm": 0.5275442600250244,
	"learning_rate": 0.0001957613394468484,
	"loss": 0.0624,
	"step": 240
	},
	{
	"epoch": 2.873563218390805,
	"grad_norm": 0.26212960481643677,
	"learning_rate": 0.0001951937845728321,
	"loss": 0.0565,
	"step": 250
	},
	{
	"epoch": 2.9885057471264367,
	"grad_norm": 0.4064450263977051,
	"learning_rate": 0.00019459151100849784,
	"loss": 0.0586,
	"step": 260
	},
	{
	"epoch": 3.103448275862069,
	"grad_norm": 0.497156023979187,
	"learning_rate": 0.0001939547384126128,
	"loss": 0.0569,
	"step": 270
	},
	{
	"epoch": 3.218390804597701,
	"grad_norm": 0.23238833248615265,
	"learning_rate": 0.00019328369902629083,
	"loss": 0.048,
	"step": 280
	},
	{
	"epoch": 3.3333333333333335,
	"grad_norm": 0.33057811856269836,
	"learning_rate": 0.00019257863758829035,
	"loss": 0.0508,
	"step": 290
	},
	{
	"epoch": 3.4482758620689653,
	"grad_norm": 0.2923976182937622,
	"learning_rate": 0.00019183981124575418,
	"loss": 0.059,
	"step": 300
	},
	{
	"epoch": 3.5632183908045976,
	"grad_norm": 0.40444961190223694,
	"learning_rate": 0.00019106748946042407,
	"loss": 0.0589,
	"step": 310
	},
	{
	"epoch": 3.67816091954023,
	"grad_norm": 0.3618530333042145,
	"learning_rate": 0.00019026195391036338,
	"loss": 0.0493,
	"step": 320
	},
	{
	"epoch": 3.793103448275862,
	"grad_norm": 0.2655580937862396,
	"learning_rate": 0.00018942349838722486,
	"loss": 0.0502,
	"step": 330
	},
	{
	"epoch": 3.9080459770114944,
	"grad_norm": 0.30788642168045044,
	"learning_rate": 0.0001885524286891002,
	"loss": 0.0562,
	"step": 340
	},
	{
	"epoch": 4.022988505747127,
	"grad_norm": 0.3801023066043854,
	"learning_rate": 0.00018764906250899027,
	"loss": 0.0536,
	"step": 350
	},
	{
	"epoch": 4.137931034482759,
	"grad_norm": 0.34299996495246887,
	"learning_rate": 0.00018671372931893773,
	"loss": 0.0583,
	"step": 360
	},
	{
	"epoch": 4.252873563218391,
	"grad_norm": 0.5825142860412598,
	"learning_rate": 0.0001857467702498633,
	"loss": 0.0465,
	"step": 370
	},
	{
	"epoch": 4.3678160919540225,
	"grad_norm": 0.46258264780044556,
	"learning_rate": 0.0001847485379671496,
	"loss": 0.0469,
	"step": 380
	},
	{
	"epoch": 4.482758620689655,
	"grad_norm": 0.23550616204738617,
	"learning_rate": 0.0001837193965420188,
	"loss": 0.0477,
	"step": 390
	},
	{
	"epoch": 4.597701149425287,
	"grad_norm": 0.609255850315094,
	"learning_rate": 0.00018265972131874987,
	"loss": 0.0467,
	"step": 400
	},
	{
	"epoch": 4.712643678160919,
	"grad_norm": 0.3701482117176056,
	"learning_rate": 0.00018156989877778461,
	"loss": 0.0471,
	"step": 410
	},
	{
	"epoch": 4.827586206896552,
	"grad_norm": 0.4651474356651306,
	"learning_rate": 0.00018045032639477194,
	"loss": 0.0434,
	"step": 420
	},
	{
	"epoch": 4.942528735632184,
	"grad_norm": 0.34028705954551697,
	"learning_rate": 0.00017930141249560233,
	"loss": 0.0452,
	"step": 430
	},
	{
	"epoch": 5.057471264367816,
	"grad_norm": 0.2748933732509613,
	"learning_rate": 0.00017812357610748488,
	"loss": 0.0413,
	"step": 440
	},
	{
	"epoch": 5.172413793103448,
	"grad_norm": 0.4612014591693878,
	"learning_rate": 0.00017691724680612118,
	"loss": 0.0423,
	"step": 450
	},
	{
	"epoch": 5.287356321839081,
	"grad_norm": 0.37961891293525696,
	"learning_rate": 0.00017568286455903258,
	"loss": 0.0422,
	"step": 460
	},
	{
	"epoch": 5.402298850574713,
	"grad_norm": 0.3245999813079834,
	"learning_rate": 0.00017442087956509665,
	"loss": 0.0396,
	"step": 470
	},
	{
	"epoch": 5.517241379310345,
	"grad_norm": 0.5230941772460938,
	"learning_rate": 0.00017313175209035268,
	"loss": 0.0405,
	"step": 480
	},
	{
	"epoch": 5.6321839080459775,
	"grad_norm": 0.2870311737060547,
	"learning_rate": 0.00017181595230013525,
	"loss": 0.0343,
	"step": 490
	},
	{
	"epoch": 5.747126436781609,
	"grad_norm": 0.2876773774623871,
	"learning_rate": 0.00017047396008759754,
	"loss": 0.0436,
	"step": 500
	},
	{
	"epoch": 5.862068965517241,
	"grad_norm": 0.4095667898654938,
	"learning_rate": 0.00016910626489868649,
	"loss": 0.0408,
	"step": 510
	},
	{
	"epoch": 5.977011494252873,
	"grad_norm": 0.377605140209198,
	"learning_rate": 0.00016771336555363418,
	"loss": 0.0415,
	"step": 520
	},
	{
	"epoch": 6.091954022988506,
	"grad_norm": 0.28248798847198486,
	"learning_rate": 0.00016629577006503009,
	"loss": 0.0386,
	"step": 530
	},
	{
	"epoch": 6.206896551724138,
	"grad_norm": 0.36199840903282166,
	"learning_rate": 0.0001648539954525409,
	"loss": 0.0405,
	"step": 540
	},
	{
	"epoch": 6.32183908045977,
	"grad_norm": 0.2778664529323578,
	"learning_rate": 0.00016338856755434503,
	"loss": 0.0359,
	"step": 550
	},
	{
	"epoch": 6.436781609195402,
	"grad_norm": 0.23418012261390686,
	"learning_rate": 0.00016190002083535122,
	"loss": 0.0382,
	"step": 560
	},
	{
	"epoch": 6.551724137931035,
	"grad_norm": 0.3027312457561493,
	"learning_rate": 0.00016038889819227045,
	"loss": 0.0394,
	"step": 570
	},
	{
	"epoch": 6.666666666666667,
	"grad_norm": 0.2858007550239563,
	"learning_rate": 0.00015885575075561326,
	"loss": 0.042,
	"step": 580
	},
	{
	"epoch": 6.781609195402299,
	"grad_norm": 0.2762337923049927,
	"learning_rate": 0.00015730113768868312,
	"loss": 0.039,
	"step": 590
	},
	{
	"epoch": 6.896551724137931,
	"grad_norm": 0.40732237696647644,
	"learning_rate": 0.0001557256259836412,
	"loss": 0.0404,
	"step": 600
	},
	{
	"epoch": 7.011494252873563,
	"grad_norm": 0.36847805976867676,
	"learning_rate": 0.00015412979025471488,
	"loss": 0.0368,
	"step": 610
	},
	{
	"epoch": 7.126436781609195,
	"grad_norm": 0.2492237538099289,
	"learning_rate": 0.00015251421252862707,
	"loss": 0.0336,
	"step": 620
	},
	{
	"epoch": 7.241379310344827,
	"grad_norm": 0.2626156210899353,
	"learning_rate": 0.00015087948203232156,
	"loss": 0.0352,
	"step": 630
	},
	{
	"epoch": 7.35632183908046,
	"grad_norm": 0.6365396976470947,
	"learning_rate": 0.00014922619497806277,
	"loss": 0.0342,
	"step": 640
	},
	{
	"epoch": 7.471264367816092,
	"grad_norm": 0.3000635802745819,
	"learning_rate": 0.00014755495434598745,
	"loss": 0.037,
	"step": 650
	},
	{
	"epoch": 7.586206896551724,
	"grad_norm": 0.21869853138923645,
	"learning_rate": 0.0001458663696641884,
	"loss": 0.0365,
	"step": 660
	},
	{
	"epoch": 7.7011494252873565,
	"grad_norm": 0.22284150123596191,
	"learning_rate": 0.0001441610567864096,
	"loss": 0.035,
	"step": 670
	},
	{
	"epoch": 7.816091954022989,
	"grad_norm": 0.267621248960495,
	"learning_rate": 0.00014243963766743495,
	"loss": 0.029,
	"step": 680
	},
	{
	"epoch": 7.931034482758621,
	"grad_norm": 0.2817297875881195,
	"learning_rate": 0.00014070274013625096,
	"loss": 0.0303,
	"step": 690
	},
	{
	"epoch": 8.045977011494253,
	"grad_norm": 0.3712492287158966,
	"learning_rate": 0.00013895099766706784,
	"loss": 0.0297,
	"step": 700
	},
	{
	"epoch": 8.160919540229886,
	"grad_norm": 0.4549995958805084,
	"learning_rate": 0.00013718504914828135,
	"loss": 0.033,
	"step": 710
	},
	{
	"epoch": 8.275862068965518,
	"grad_norm": 0.28695234656333923,
	"learning_rate": 0.00013540553864945976,
	"loss": 0.0306,
	"step": 720
	},
	{
	"epoch": 8.39080459770115,
	"grad_norm": 0.34577062726020813,
	"learning_rate": 0.00013361311518644172,
	"loss": 0.0325,
	"step": 730
	},
	{
	"epoch": 8.505747126436782,
	"grad_norm": 0.3214464783668518,
	"learning_rate": 0.00013180843248462973,
	"loss": 0.0337,
	"step": 740
	},
	{
	"epoch": 8.620689655172415,
	"grad_norm": 0.33310961723327637,
	"learning_rate": 0.00012999214874056595,
	"loss": 0.0344,
	"step": 750
	},
	{
	"epoch": 8.735632183908045,
	"grad_norm": 0.25606226921081543,
	"learning_rate": 0.00012816492638187762,
	"loss": 0.0396,
	"step": 760
	},
	{
	"epoch": 8.850574712643677,
	"grad_norm": 0.36330148577690125,
	"learning_rate": 0.00012632743182567905,
	"loss": 0.0348,
	"step": 770
	},
	{
	"epoch": 8.96551724137931,
	"grad_norm": 0.314394474029541,
	"learning_rate": 0.00012448033523551865,
	"loss": 0.0399,
	"step": 780
	},
	{
	"epoch": 9.080459770114942,
	"grad_norm": 0.28129351139068604,
	"learning_rate": 0.00012262431027695964,
	"loss": 0.0298,
	"step": 790
	},
	{
	"epoch": 9.195402298850574,
	"grad_norm": 0.256881982088089,
	"learning_rate": 0.00012076003387188353,
	"loss": 0.0292,
	"step": 800
	},
	{
	"epoch": 9.310344827586206,
	"grad_norm": 0.1919921189546585,
	"learning_rate": 0.00011888818595160584,
	"loss": 0.0269,
	"step": 810
	},
	{
	"epoch": 9.425287356321839,
	"grad_norm": 0.2719796895980835,
	"learning_rate": 0.00011700944920889436,
	"loss": 0.0265,
	"step": 820
	},
	{
	"epoch": 9.540229885057471,
	"grad_norm": 0.2269754707813263,
	"learning_rate": 0.00011512450884898022,
	"loss": 0.0316,
	"step": 830
	},
	{
	"epoch": 9.655172413793103,
	"grad_norm": 0.23504453897476196,
	"learning_rate": 0.00011323405233965256,
	"loss": 0.0273,
	"step": 840
	},
	{
	"epoch": 9.770114942528735,
	"grad_norm": 0.22570957243442535,
	"learning_rate": 0.00011133876916052821,
	"loss": 0.0304,
	"step": 850
	},
	{
	"epoch": 9.885057471264368,
	"grad_norm": 0.19824576377868652,
	"learning_rate": 0.00010943935055158734,
	"loss": 0.0283,
	"step": 860
	},
	{
	"epoch": 10.0,
	"grad_norm": 0.41852012276649475,
	"learning_rate": 0.00010753648926106723,
	"loss": 0.0319,
	"step": 870
	},
	{
	"epoch": 10.114942528735632,
	"grad_norm": 0.20548714697360992,
	"learning_rate": 0.00010563087929280613,
	"loss": 0.0285,
	"step": 880
	},
	{
	"epoch": 10.229885057471265,
	"grad_norm": 0.22767336666584015,
	"learning_rate": 0.00010372321565312872,
	"loss": 0.031,
	"step": 890
	},
	{
	"epoch": 10.344827586206897,
	"grad_norm": 0.20542040467262268,
	"learning_rate": 0.00010181419409736647,
	"loss": 0.0316,
	"step": 900
	},
	{
	"epoch": 10.459770114942529,
	"grad_norm": 0.3105849027633667,
	"learning_rate": 9.990451087610448e-05,
	"loss": 0.027,
	"step": 910
	},
	{
	"epoch": 10.574712643678161,
	"grad_norm": 0.31816890835762024,
	"learning_rate": 9.799486248124775e-05,
	"loss": 0.025,
	"step": 920
	},
	{
	"epoch": 10.689655172413794,
	"grad_norm": 0.3295416235923767,
	"learning_rate": 9.608594539199957e-05,
	"loss": 0.0247,
	"step": 930
	},
	{
	"epoch": 10.804597701149426,
	"grad_norm": 0.17071272432804108,
	"learning_rate": 9.417845582084448e-05,
	"loss": 0.0291,
	"step": 940
	},
	{
	"epoch": 10.919540229885058,
	"grad_norm": 0.189552441239357,
	"learning_rate": 9.227308945962827e-05,
	"loss": 0.0243,
	"step": 950
	},
	{
	"epoch": 11.03448275862069,
	"grad_norm": 0.3179641664028168,
	"learning_rate": 9.037054122582839e-05,
	"loss": 0.0308,
	"step": 960
	},
	{
	"epoch": 11.149425287356323,
	"grad_norm": 0.3051457703113556,
	"learning_rate": 8.847150500910618e-05,
	"loss": 0.0275,
	"step": 970
	},
	{
	"epoch": 11.264367816091955,
	"grad_norm": 0.29757606983184814,
	"learning_rate": 8.657667341823448e-05,
	"loss": 0.0264,
	"step": 980
	},
	{
	"epoch": 11.379310344827585,
	"grad_norm": 0.2845855951309204,
	"learning_rate": 8.4686737528492e-05,
	"loss": 0.0249,
	"step": 990
	},
	{
	"epoch": 11.494252873563218,
	"grad_norm": 0.239737406373024,
	"learning_rate": 8.280238662961728e-05,
	"loss": 0.027,
	"step": 1000
	},
	{
	"epoch": 11.60919540229885,
	"grad_norm": 0.2692360281944275,
	"learning_rate": 8.092430797441364e-05,
	"loss": 0.0216,
	"step": 1010
	},
	{
	"epoch": 11.724137931034482,
	"grad_norm": 0.18495500087738037,
	"learning_rate": 7.905318652809728e-05,
	"loss": 0.0255,
	"step": 1020
	},
	{
	"epoch": 11.839080459770114,
	"grad_norm": 0.2230875939130783,
	"learning_rate": 7.718970471847923e-05,
	"loss": 0.0262,
	"step": 1030
	},
	{
	"epoch": 11.954022988505747,
	"grad_norm": 0.14376775920391083,
	"learning_rate": 7.53345421870735e-05,
	"loss": 0.0209,
	"step": 1040
	},
	{
	"epoch": 12.068965517241379,
	"grad_norm": 0.20623371005058289,
	"learning_rate": 7.348837554122057e-05,
	"loss": 0.0192,
	"step": 1050
	},
	{
	"epoch": 12.183908045977011,
	"grad_norm": 0.27209600806236267,
	"learning_rate": 7.165187810731823e-05,
	"loss": 0.0208,
	"step": 1060
	},
	{
	"epoch": 12.298850574712644,
	"grad_norm": 0.19447851181030273,
	"learning_rate": 6.982571968524847e-05,
	"loss": 0.0201,
	"step": 1070
	},
	{
	"epoch": 12.413793103448276,
	"grad_norm": 0.18613241612911224,
	"learning_rate": 6.801056630409098e-05,
	"loss": 0.0248,
	"step": 1080
	},
	{
	"epoch": 12.528735632183908,
	"grad_norm": 0.24156583845615387,
	"learning_rate": 6.620707997921157e-05,
	"loss": 0.0197,
	"step": 1090
	},
	{
	"epoch": 12.64367816091954,
	"grad_norm": 0.16912145912647247,
	"learning_rate": 6.441591847081476e-05,
	"loss": 0.022,
	"step": 1100
	},
	{
	"epoch": 12.758620689655173,
	"grad_norm": 0.14165754616260529,
	"learning_rate": 6.263773504404801e-05,
	"loss": 0.0199,
	"step": 1110
	},
	{
	"epoch": 12.873563218390805,
	"grad_norm": 0.3424724340438843,
	"learning_rate": 6.087317823074565e-05,
	"loss": 0.0209,
	"step": 1120
	},
	{
	"epoch": 12.988505747126437,
	"grad_norm": 0.2658204138278961,
	"learning_rate": 5.912289159289883e-05,
	"loss": 0.0242,
	"step": 1130
	},
	{
	"epoch": 13.10344827586207,
	"grad_norm": 0.21321730315685272,
	"learning_rate": 5.7387513487938386e-05,
	"loss": 0.0216,
	"step": 1140
	},
	{
	"epoch": 13.218390804597702,
	"grad_norm": 0.2854823172092438,
	"learning_rate": 5.566767683591553e-05,
	"loss": 0.0227,
	"step": 1150
	},
	{
	"epoch": 13.333333333333334,
	"grad_norm": 0.28919658064842224,
	"learning_rate": 5.396400888866601e-05,
	"loss": 0.0195,
	"step": 1160
	},
	{
	"epoch": 13.448275862068966,
	"grad_norm": 0.22510255873203278,
	"learning_rate": 5.2277131001041125e-05,
	"loss": 0.0241,
	"step": 1170
	},
	{
	"epoch": 13.563218390804598,
	"grad_norm": 0.21545900404453278,
	"learning_rate": 5.060765840429e-05,
	"loss": 0.023,
	"step": 1180
	},
	{
	"epoch": 13.678160919540229,
	"grad_norm": 0.20618782937526703,
	"learning_rate": 4.8956199981674656e-05,
	"loss": 0.0181,
	"step": 1190
	},
	{
	"epoch": 13.793103448275861,
	"grad_norm": 0.22331970930099487,
	"learning_rate": 4.7323358046400844e-05,
	"loss": 0.0212,
	"step": 1200
	},
	{
	"epoch": 13.908045977011493,
	"grad_norm": 0.14791706204414368,
	"learning_rate": 4.570972812194457e-05,
	"loss": 0.0195,
	"step": 1210
	},
	{
	"epoch": 14.022988505747126,
	"grad_norm": 0.1526448130607605,
	"learning_rate": 4.4115898724855455e-05,
	"loss": 0.0188,
	"step": 1220
	},
	{
	"epoch": 14.137931034482758,
	"grad_norm": 0.18956783413887024,
	"learning_rate": 4.254245115011506e-05,
	"loss": 0.0188,
	"step": 1230
	},
	{
	"epoch": 14.25287356321839,
	"grad_norm": 0.1313301920890808,
	"learning_rate": 4.098995925912972e-05,
	"loss": 0.019,
	"step": 1240
	},
	{
	"epoch": 14.367816091954023,
	"grad_norm": 0.13764789700508118,
	"learning_rate": 3.945898927043372e-05,
	"loss": 0.0175,
	"step": 1250
	},
	{
	"epoch": 14.482758620689655,
	"grad_norm": 0.19556942582130432,
	"learning_rate": 3.7950099553180804e-05,
	"loss": 0.0196,
	"step": 1260
	},
	{
	"epoch": 14.597701149425287,
	"grad_norm": 0.14027345180511475,
	"learning_rate": 3.646384042349764e-05,
	"loss": 0.0177,
	"step": 1270
	},
	{
	"epoch": 14.71264367816092,
	"grad_norm": 0.2918284833431244,
	"learning_rate": 3.500075394377511e-05,
	"loss": 0.0204,
	"step": 1280
	},
	{
	"epoch": 14.827586206896552,
	"grad_norm": 0.12948164343833923,
	"learning_rate": 3.3561373724969224e-05,
	"loss": 0.0188,
	"step": 1290
	},
	{
	"epoch": 14.942528735632184,
	"grad_norm": 0.1773224174976349,
	"learning_rate": 3.214622473198492e-05,
	"loss": 0.0212,
	"step": 1300
	},
	{
	"epoch": 15.057471264367816,
	"grad_norm": 0.29863160848617554,
	"learning_rate": 3.075582309221289e-05,
	"loss": 0.0157,
	"step": 1310
	},
	{
	"epoch": 15.172413793103448,
	"grad_norm": 0.18764474987983704,
	"learning_rate": 2.939067590728972e-05,
	"loss": 0.0175,
	"step": 1320
	},
	{
	"epoch": 15.28735632183908,
	"grad_norm": 0.16273990273475647,
	"learning_rate": 2.8051281068149803e-05,
	"loss": 0.0135,
	"step": 1330
	},
	{
	"epoch": 15.402298850574713,
	"grad_norm": 0.25088945031166077,
	"learning_rate": 2.673812707343669e-05,
	"loss": 0.0242,
	"step": 1340
	},
	{
	"epoch": 15.517241379310345,
	"grad_norm": 0.25521960854530334,
	"learning_rate": 2.545169285133965e-05,
	"loss": 0.0188,
	"step": 1350
	},
	{
	"epoch": 15.632183908045977,
	"grad_norm": 0.15780223906040192,
	"learning_rate": 2.4192447584921195e-05,
	"loss": 0.0194,
	"step": 1360
	},
	{
	"epoch": 15.74712643678161,
	"grad_norm": 0.13658417761325836,
	"learning_rate": 2.296085054099828e-05,
	"loss": 0.0179,
	"step": 1370
	},
	{
	"epoch": 15.862068965517242,
	"grad_norm": 0.14593394100666046,
	"learning_rate": 2.175735090264058e-05,
	"loss": 0.016,
	"step": 1380
	},
	{
	"epoch": 15.977011494252874,
	"grad_norm": 0.20093883574008942,
	"learning_rate": 2.0582387605346088e-05,
	"loss": 0.0157,
	"step": 1390
	},
	{
	"epoch": 16.091954022988507,
	"grad_norm": 0.22261527180671692,
	"learning_rate": 1.943638917695453e-05,
	"loss": 0.0175,
	"step": 1400
	},
	{
	"epoch": 16.20689655172414,
	"grad_norm": 0.17486433684825897,
	"learning_rate": 1.831977358135625e-05,
	"loss": 0.0166,
	"step": 1410
	},
	{
	"epoch": 16.32183908045977,
	"grad_norm": 0.2138216346502304,
	"learning_rate": 1.723294806605428e-05,
	"loss": 0.0146,
	"step": 1420
	},
	{
	"epoch": 16.436781609195403,
	"grad_norm": 0.20112960040569305,
	"learning_rate": 1.6176309013634517e-05,
	"loss": 0.0159,
	"step": 1430
	},
	{
	"epoch": 16.551724137931036,
	"grad_norm": 0.15377485752105713,
	"learning_rate": 1.5150241797198883e-05,
	"loss": 0.016,
	"step": 1440
	},
	{
	"epoch": 16.666666666666668,
	"grad_norm": 0.23132722079753876,
	"learning_rate": 1.415512063981339e-05,
	"loss": 0.0134,
	"step": 1450
	},
	{
	"epoch": 16.7816091954023,
	"grad_norm": 0.15262600779533386,
	"learning_rate": 1.3191308478023212e-05,
	"loss": 0.017,
	"step": 1460
	},
	{
	"epoch": 16.896551724137932,
	"grad_norm": 0.0991855040192604,
	"learning_rate": 1.2259156829483842e-05,
	"loss": 0.0167,
	"step": 1470
	},
	{
	"epoch": 17.011494252873565,
	"grad_norm": 0.12278055399656296,
	"learning_rate": 1.1359005664756994e-05,
	"loss": 0.0146,
	"step": 1480
	},
	{
	"epoch": 17.126436781609197,
	"grad_norm": 0.17124158143997192,
	"learning_rate": 1.0491183283317997e-05,
	"loss": 0.0173,
	"step": 1490
	},
	{
	"epoch": 17.24137931034483,
	"grad_norm": 0.1300356686115265,
	"learning_rate": 9.656006193819633e-06,
	"loss": 0.0143,
	"step": 1500
	},
	{
	"epoch": 17.35632183908046,
	"grad_norm": 0.17946338653564453,
	"learning_rate": 8.853778998656537e-06,
	"loss": 0.0154,
	"step": 1510
	},
	{
	"epoch": 17.47126436781609,
	"grad_norm": 0.28736400604248047,
	"learning_rate": 8.084794282871689e-06,
	"loss": 0.0166,
	"step": 1520
	},
	{
	"epoch": 17.586206896551722,
	"grad_norm": 0.13112574815750122,
	"learning_rate": 7.3493325074461165e-06,
	"loss": 0.0132,
	"step": 1530
	},
	{
	"epoch": 17.701149425287355,
	"grad_norm": 0.12864838540554047,
	"learning_rate": 6.647661907010183e-06,
	"loss": 0.0171,
	"step": 1540
	},
	{
	"epoch": 17.816091954022987,
	"grad_norm": 0.16958807408809662,
	"learning_rate": 5.980038392014309e-06,
	"loss": 0.0161,
	"step": 1550
	},
	{
	"epoch": 17.93103448275862,
	"grad_norm": 0.36121729016304016,
	"learning_rate": 5.3467054553941405e-06,
	"loss": 0.0158,
	"step": 1560
	},
	{
	"epoch": 18.04597701149425,
	"grad_norm": 0.2107989490032196,
	"learning_rate": 4.7478940837649924e-06,
	"loss": 0.0147,
	"step": 1570
	},
	{
	"epoch": 18.160919540229884,
	"grad_norm": 0.15654149651527405,
	"learning_rate": 4.183822673177229e-06,
	"loss": 0.0164,
	"step": 1580
	},
	{
	"epoch": 18.275862068965516,
	"grad_norm": 0.1438828557729721,
	"learning_rate": 3.6546969494637986e-06,
	"loss": 0.0131,
	"step": 1590
	},
	{
	"epoch": 18.39080459770115,
	"grad_norm": 0.2543192207813263,
	"learning_rate": 3.16070989320868e-06,
	"loss": 0.0157,
	"step": 1600
	},
	{
	"epoch": 18.50574712643678,
	"grad_norm": 0.13453112542629242,
	"learning_rate": 2.702041669363875e-06,
	"loss": 0.017,
	"step": 1610
	},
	{
	"epoch": 18.620689655172413,
	"grad_norm": 0.16369780898094177,
	"learning_rate": 2.2788595615403474e-06,
	"loss": 0.0157,
	"step": 1620
	},
	{
	"epoch": 18.735632183908045,
	"grad_norm": 0.14639818668365479,
	"learning_rate": 1.8913179109969482e-06,
	"loss": 0.0122,
	"step": 1630
	},
	{
	"epoch": 18.850574712643677,
	"grad_norm": 0.23813354969024658,
	"learning_rate": 1.5395580603498328e-06,
	"loss": 0.0157,
	"step": 1640
	},
	{
	"epoch": 18.96551724137931,
	"grad_norm": 0.15577834844589233,
	"learning_rate": 1.2237083020224526e-06,
	"loss": 0.0144,
	"step": 1650
	},
	{
	"epoch": 19.080459770114942,
	"grad_norm": 0.06880059838294983,
	"learning_rate": 9.438838314553056e-07,
	"loss": 0.0109,
	"step": 1660
	},
	{
	"epoch": 19.195402298850574,
	"grad_norm": 0.19819270074367523,
	"learning_rate": 7.001867050923095e-07,
	"loss": 0.0134,
	"step": 1670
	},
	{
	"epoch": 19.310344827586206,
	"grad_norm": 0.10673543065786362,
	"learning_rate": 4.92705803159188e-07,
	"loss": 0.0155,
	"step": 1680
	},
	{
	"epoch": 19.42528735632184,
	"grad_norm": 0.16529639065265656,
	"learning_rate": 3.2151679724748975e-07,
	"loss": 0.0175,
	"step": 1690
	},
	{
	"epoch": 19.54022988505747,
	"grad_norm": 0.1206677109003067,
	"learning_rate": 1.8668212271585327e-07,
	"loss": 0.0188,
	"step": 1700
	},
	{
	"epoch": 19.655172413793103,
	"grad_norm": 0.10180158913135529,
	"learning_rate": 8.825095591891152e-08,
	"loss": 0.0158,
	"step": 1710
	},
	{
	"epoch": 19.770114942528735,
	"grad_norm": 0.14544282853603363,
	"learning_rate": 2.625919627188278e-08,
	"loss": 0.015,
	"step": 1720
	},
	{
	"epoch": 19.885057471264368,
	"grad_norm": 0.14054107666015625,
	"learning_rate": 7.294531574553176e-10,
	"loss": 0.0139,
	"step": 1730
	},
	{
	"epoch": 19.908045977011493,
	"step": 1732,
	"total_flos": 2.431760592612004e+17,
	"train_loss": 0.04785273781403314,
	"train_runtime": 1980.9382,
	"train_samples_per_second": 55.957,
	"train_steps_per_second": 0.874
	}
	],
	"logging_steps": 10,
	"max_steps": 1732,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 20,
	"save_steps": 10000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.431760592612004e+17,
	"train_batch_size": 64,
	"trial_name": null,
	"trial_params": null
	}