nebo1337-GetTheRubber-l5ydy3nt7g / trainer_state.json

Upload trainer_state.json with huggingface_hub

cac37b5 verified 3 months ago

26.2 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 19.931506849315067,
	"eval_steps": 500,
	"global_step": 1455,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.136986301369863,
	"grad_norm": 7.088261127471924,
	"learning_rate": 2.7397260273972603e-05,
	"loss": 1.4774,
	"step": 10
	},
	{
	"epoch": 0.273972602739726,
	"grad_norm": 3.0658137798309326,
	"learning_rate": 5.479452054794521e-05,
	"loss": 0.5535,
	"step": 20
	},
	{
	"epoch": 0.410958904109589,
	"grad_norm": 1.415347695350647,
	"learning_rate": 8.219178082191781e-05,
	"loss": 0.2769,
	"step": 30
	},
	{
	"epoch": 0.547945205479452,
	"grad_norm": 1.2462260723114014,
	"learning_rate": 0.00010958904109589041,
	"loss": 0.2071,
	"step": 40
	},
	{
	"epoch": 0.684931506849315,
	"grad_norm": 1.1219278573989868,
	"learning_rate": 0.000136986301369863,
	"loss": 0.179,
	"step": 50
	},
	{
	"epoch": 0.821917808219178,
	"grad_norm": 2.4104228019714355,
	"learning_rate": 0.00016438356164383562,
	"loss": 0.1587,
	"step": 60
	},
	{
	"epoch": 0.958904109589041,
	"grad_norm": 1.2239787578582764,
	"learning_rate": 0.0001917808219178082,
	"loss": 0.1366,
	"step": 70
	},
	{
	"epoch": 1.095890410958904,
	"grad_norm": 0.9942715167999268,
	"learning_rate": 0.00019998733979961563,
	"loss": 0.1218,
	"step": 80
	},
	{
	"epoch": 1.2328767123287672,
	"grad_norm": 0.6293880939483643,
	"learning_rate": 0.0001999253383717226,
	"loss": 0.1168,
	"step": 90
	},
	{
	"epoch": 1.36986301369863,
	"grad_norm": 0.7170248031616211,
	"learning_rate": 0.00019981170237143067,
	"loss": 0.1052,
	"step": 100
	},
	{
	"epoch": 1.5068493150684932,
	"grad_norm": 0.7464343905448914,
	"learning_rate": 0.00019964649051804355,
	"loss": 0.1066,
	"step": 110
	},
	{
	"epoch": 1.643835616438356,
	"grad_norm": 0.6828764081001282,
	"learning_rate": 0.000199429788181734,
	"loss": 0.1057,
	"step": 120
	},
	{
	"epoch": 1.7808219178082192,
	"grad_norm": 0.6028720736503601,
	"learning_rate": 0.0001991617073394306,
	"loss": 0.0843,
	"step": 130
	},
	{
	"epoch": 1.9178082191780823,
	"grad_norm": 0.5440357327461243,
	"learning_rate": 0.00019884238651695556,
	"loss": 0.0948,
	"step": 140
	},
	{
	"epoch": 2.0547945205479454,
	"grad_norm": 0.8612964749336243,
	"learning_rate": 0.00019847199071744415,
	"loss": 0.085,
	"step": 150
	},
	{
	"epoch": 2.191780821917808,
	"grad_norm": 0.889124870300293,
	"learning_rate": 0.00019805071133608242,
	"loss": 0.0962,
	"step": 160
	},
	{
	"epoch": 2.328767123287671,
	"grad_norm": 0.45466411113739014,
	"learning_rate": 0.0001975787660612072,
	"loss": 0.0763,
	"step": 170
	},
	{
	"epoch": 2.4657534246575343,
	"grad_norm": 0.42088282108306885,
	"learning_rate": 0.00019705639876181969,
	"loss": 0.0635,
	"step": 180
	},
	{
	"epoch": 2.602739726027397,
	"grad_norm": 0.5170985460281372,
	"learning_rate": 0.00019648387936157068,
	"loss": 0.0726,
	"step": 190
	},
	{
	"epoch": 2.73972602739726,
	"grad_norm": 0.4313249886035919,
	"learning_rate": 0.00019586150369928245,
	"loss": 0.0669,
	"step": 200
	},
	{
	"epoch": 2.8767123287671232,
	"grad_norm": 0.3355115056037903,
	"learning_rate": 0.00019518959337607957,
	"loss": 0.0682,
	"step": 210
	},
	{
	"epoch": 3.0136986301369864,
	"grad_norm": 0.34427109360694885,
	"learning_rate": 0.0001944684955892075,
	"loss": 0.0638,
	"step": 220
	},
	{
	"epoch": 3.1506849315068495,
	"grad_norm": 0.2929873466491699,
	"learning_rate": 0.0001936985829526247,
	"loss": 0.0632,
	"step": 230
	},
	{
	"epoch": 3.287671232876712,
	"grad_norm": 0.3884938657283783,
	"learning_rate": 0.00019288025330446126,
	"loss": 0.0655,
	"step": 240
	},
	{
	"epoch": 3.4246575342465753,
	"grad_norm": 0.27399152517318726,
	"learning_rate": 0.00019201392950144363,
	"loss": 0.0533,
	"step": 250
	},
	{
	"epoch": 3.5616438356164384,
	"grad_norm": 0.2924444079399109,
	"learning_rate": 0.0001911000592003909,
	"loss": 0.0589,
	"step": 260
	},
	{
	"epoch": 3.6986301369863015,
	"grad_norm": 0.43013861775398254,
	"learning_rate": 0.00019013911462689668,
	"loss": 0.0615,
	"step": 270
	},
	{
	"epoch": 3.8356164383561646,
	"grad_norm": 0.5247001647949219,
	"learning_rate": 0.000189131592331315,
	"loss": 0.0583,
	"step": 280
	},
	{
	"epoch": 3.9726027397260273,
	"grad_norm": 0.5796880722045898,
	"learning_rate": 0.00018807801293217735,
	"loss": 0.0556,
	"step": 290
	},
	{
	"epoch": 4.109589041095891,
	"grad_norm": 0.5179729461669922,
	"learning_rate": 0.00018697892084717238,
	"loss": 0.056,
	"step": 300
	},
	{
	"epoch": 4.2465753424657535,
	"grad_norm": 0.42960262298583984,
	"learning_rate": 0.00018583488401182843,
	"loss": 0.0637,
	"step": 310
	},
	{
	"epoch": 4.383561643835616,
	"grad_norm": 0.3196163773536682,
	"learning_rate": 0.0001846464935860431,
	"loss": 0.0518,
	"step": 320
	},
	{
	"epoch": 4.52054794520548,
	"grad_norm": 0.4424096643924713,
	"learning_rate": 0.0001834143636486124,
	"loss": 0.0524,
	"step": 330
	},
	{
	"epoch": 4.657534246575342,
	"grad_norm": 0.50010746717453,
	"learning_rate": 0.00018213913087991685,
	"loss": 0.0629,
	"step": 340
	},
	{
	"epoch": 4.794520547945205,
	"grad_norm": 0.4036540389060974,
	"learning_rate": 0.00018082145423292868,
	"loss": 0.0531,
	"step": 350
	},
	{
	"epoch": 4.931506849315069,
	"grad_norm": 0.36036092042922974,
	"learning_rate": 0.0001794620145927101,
	"loss": 0.0556,
	"step": 360
	},
	{
	"epoch": 5.068493150684931,
	"grad_norm": 0.22472509741783142,
	"learning_rate": 0.00017806151442457827,
	"loss": 0.0446,
	"step": 370
	},
	{
	"epoch": 5.205479452054795,
	"grad_norm": 0.3514921963214874,
	"learning_rate": 0.00017662067741111974,
	"loss": 0.0443,
	"step": 380
	},
	{
	"epoch": 5.342465753424658,
	"grad_norm": 0.2920095920562744,
	"learning_rate": 0.00017514024807824055,
	"loss": 0.0451,
	"step": 390
	},
	{
	"epoch": 5.47945205479452,
	"grad_norm": 0.21051590144634247,
	"learning_rate": 0.00017362099141044626,
	"loss": 0.0476,
	"step": 400
	},
	{
	"epoch": 5.616438356164384,
	"grad_norm": 0.36196619272232056,
	"learning_rate": 0.00017206369245555036,
	"loss": 0.0521,
	"step": 410
	},
	{
	"epoch": 5.7534246575342465,
	"grad_norm": 0.3503723442554474,
	"learning_rate": 0.0001704691559190155,
	"loss": 0.0472,
	"step": 420
	},
	{
	"epoch": 5.890410958904109,
	"grad_norm": 0.3881896734237671,
	"learning_rate": 0.0001688382057481364,
	"loss": 0.0537,
	"step": 430
	},
	{
	"epoch": 6.027397260273973,
	"grad_norm": 0.29409492015838623,
	"learning_rate": 0.00016717168470628077,
	"loss": 0.0436,
	"step": 440
	},
	{
	"epoch": 6.164383561643835,
	"grad_norm": 0.2455558031797409,
	"learning_rate": 0.0001654704539374066,
	"loss": 0.0429,
	"step": 450
	},
	{
	"epoch": 6.301369863013699,
	"grad_norm": 0.30749672651290894,
	"learning_rate": 0.00016373539252108202,
	"loss": 0.042,
	"step": 460
	},
	{
	"epoch": 6.438356164383562,
	"grad_norm": 0.4117829501628876,
	"learning_rate": 0.00016196739701823716,
	"loss": 0.0422,
	"step": 470
	},
	{
	"epoch": 6.575342465753424,
	"grad_norm": 0.3047957718372345,
	"learning_rate": 0.00016016738100788297,
	"loss": 0.0456,
	"step": 480
	},
	{
	"epoch": 6.712328767123288,
	"grad_norm": 0.3104310631752014,
	"learning_rate": 0.00015833627461503595,
	"loss": 0.0405,
	"step": 490
	},
	{
	"epoch": 6.8493150684931505,
	"grad_norm": 0.3713166415691376,
	"learning_rate": 0.0001564750240300934,
	"loss": 0.0451,
	"step": 500
	},
	{
	"epoch": 6.986301369863014,
	"grad_norm": 0.23804673552513123,
	"learning_rate": 0.00015458459101990693,
	"loss": 0.0387,
	"step": 510
	},
	{
	"epoch": 7.123287671232877,
	"grad_norm": 0.4476951062679291,
	"learning_rate": 0.00015266595243080714,
	"loss": 0.0406,
	"step": 520
	},
	{
	"epoch": 7.260273972602739,
	"grad_norm": 0.27973777055740356,
	"learning_rate": 0.00015072009968383656,
	"loss": 0.0464,
	"step": 530
	},
	{
	"epoch": 7.397260273972603,
	"grad_norm": 0.3597777783870697,
	"learning_rate": 0.00014874803826245089,
	"loss": 0.0459,
	"step": 540
	},
	{
	"epoch": 7.534246575342466,
	"grad_norm": 0.27027377486228943,
	"learning_rate": 0.00014675078719295415,
	"loss": 0.0375,
	"step": 550
	},
	{
	"epoch": 7.671232876712329,
	"grad_norm": 0.27681443095207214,
	"learning_rate": 0.00014472937851793557,
	"loss": 0.0421,
	"step": 560
	},
	{
	"epoch": 7.808219178082192,
	"grad_norm": 0.3312411904335022,
	"learning_rate": 0.00014268485676298078,
	"loss": 0.048,
	"step": 570
	},
	{
	"epoch": 7.945205479452055,
	"grad_norm": 0.2358381599187851,
	"learning_rate": 0.0001406182783969324,
	"loss": 0.0409,
	"step": 580
	},
	{
	"epoch": 8.082191780821917,
	"grad_norm": 0.19072838127613068,
	"learning_rate": 0.00013853071128597924,
	"loss": 0.0417,
	"step": 590
	},
	{
	"epoch": 8.219178082191782,
	"grad_norm": 0.3328644931316376,
	"learning_rate": 0.0001364232341418564,
	"loss": 0.0397,
	"step": 600
	},
	{
	"epoch": 8.356164383561644,
	"grad_norm": 0.27157458662986755,
	"learning_rate": 0.00013429693596444067,
	"loss": 0.0395,
	"step": 610
	},
	{
	"epoch": 8.493150684931507,
	"grad_norm": 0.2969032824039459,
	"learning_rate": 0.00013215291547903006,
	"loss": 0.0406,
	"step": 620
	},
	{
	"epoch": 8.63013698630137,
	"grad_norm": 0.2864357829093933,
	"learning_rate": 0.00012999228056859784,
	"loss": 0.0424,
	"step": 630
	},
	{
	"epoch": 8.767123287671232,
	"grad_norm": 0.25885725021362305,
	"learning_rate": 0.00012781614770131442,
	"loss": 0.0392,
	"step": 640
	},
	{
	"epoch": 8.904109589041095,
	"grad_norm": 0.2456735372543335,
	"learning_rate": 0.00012562564135363313,
	"loss": 0.0415,
	"step": 650
	},
	{
	"epoch": 9.04109589041096,
	"grad_norm": 0.41431066393852234,
	"learning_rate": 0.0001234218934292376,
	"loss": 0.0407,
	"step": 660
	},
	{
	"epoch": 9.178082191780822,
	"grad_norm": 0.260213702917099,
	"learning_rate": 0.00012120604267415172,
	"loss": 0.0393,
	"step": 670
	},
	{
	"epoch": 9.315068493150685,
	"grad_norm": 0.3395901322364807,
	"learning_rate": 0.00011897923408831346,
	"loss": 0.035,
	"step": 680
	},
	{
	"epoch": 9.452054794520548,
	"grad_norm": 0.3405311405658722,
	"learning_rate": 0.0001167426183339174,
	"loss": 0.0342,
	"step": 690
	},
	{
	"epoch": 9.58904109589041,
	"grad_norm": 0.20802819728851318,
	"learning_rate": 0.00011449735114083127,
	"loss": 0.0347,
	"step": 700
	},
	{
	"epoch": 9.726027397260275,
	"grad_norm": 0.5094506144523621,
	"learning_rate": 0.00011224459270939384,
	"loss": 0.0373,
	"step": 710
	},
	{
	"epoch": 9.863013698630137,
	"grad_norm": 0.21799403429031372,
	"learning_rate": 0.000109985507110903,
	"loss": 0.0392,
	"step": 720
	},
	{
	"epoch": 10.0,
	"grad_norm": 0.28433603048324585,
	"learning_rate": 0.00010772126168610325,
	"loss": 0.0373,
	"step": 730
	},
	{
	"epoch": 10.136986301369863,
	"grad_norm": 0.3425813913345337,
	"learning_rate": 0.00010545302644198405,
	"loss": 0.0385,
	"step": 740
	},
	{
	"epoch": 10.273972602739725,
	"grad_norm": 0.2662697434425354,
	"learning_rate": 0.00010318197344720018,
	"loss": 0.0347,
	"step": 750
	},
	{
	"epoch": 10.41095890410959,
	"grad_norm": 0.2841816842556,
	"learning_rate": 0.0001009092762264271,
	"loss": 0.04,
	"step": 760
	},
	{
	"epoch": 10.547945205479452,
	"grad_norm": 0.2933363914489746,
	"learning_rate": 9.863610915396365e-05,
	"loss": 0.0363,
	"step": 770
	},
	{
	"epoch": 10.684931506849315,
	"grad_norm": 0.20692330598831177,
	"learning_rate": 9.63636468468959e-05,
	"loss": 0.0361,
	"step": 780
	},
	{
	"epoch": 10.821917808219178,
	"grad_norm": 0.24741721153259277,
	"learning_rate": 9.409306355813529e-05,
	"loss": 0.0341,
	"step": 790
	},
	{
	"epoch": 10.95890410958904,
	"grad_norm": 0.1948077529668808,
	"learning_rate": 9.18255325696454e-05,
	"loss": 0.0349,
	"step": 800
	},
	{
	"epoch": 11.095890410958905,
	"grad_norm": 0.16165360808372498,
	"learning_rate": 8.956222558616998e-05,
	"loss": 0.0318,
	"step": 810
	},
	{
	"epoch": 11.232876712328768,
	"grad_norm": 0.25702184438705444,
	"learning_rate": 8.730431212977625e-05,
	"loss": 0.0281,
	"step": 820
	},
	{
	"epoch": 11.36986301369863,
	"grad_norm": 0.27587395906448364,
	"learning_rate": 8.505295893552594e-05,
	"loss": 0.0349,
	"step": 830
	},
	{
	"epoch": 11.506849315068493,
	"grad_norm": 0.3140430152416229,
	"learning_rate": 8.280932934858652e-05,
	"loss": 0.0305,
	"step": 840
	},
	{
	"epoch": 11.643835616438356,
	"grad_norm": 0.21165433526039124,
	"learning_rate": 8.05745827230941e-05,
	"loss": 0.0314,
	"step": 850
	},
	{
	"epoch": 11.780821917808218,
	"grad_norm": 0.20445489883422852,
	"learning_rate": 7.834987382307861e-05,
	"loss": 0.0319,
	"step": 860
	},
	{
	"epoch": 11.917808219178083,
	"grad_norm": 0.27832481265068054,
	"learning_rate": 7.613635222576072e-05,
	"loss": 0.0334,
	"step": 870
	},
	{
	"epoch": 12.054794520547945,
	"grad_norm": 0.25728923082351685,
	"learning_rate": 7.393516172752919e-05,
	"loss": 0.033,
	"step": 880
	},
	{
	"epoch": 12.191780821917808,
	"grad_norm": 0.2254086136817932,
	"learning_rate": 7.174743975290513e-05,
	"loss": 0.0346,
	"step": 890
	},
	{
	"epoch": 12.32876712328767,
	"grad_norm": 0.31018713116645813,
	"learning_rate": 6.957431676679896e-05,
	"loss": 0.0329,
	"step": 900
	},
	{
	"epoch": 12.465753424657533,
	"grad_norm": 0.32662343978881836,
	"learning_rate": 6.741691569036338e-05,
	"loss": 0.0342,
	"step": 910
	},
	{
	"epoch": 12.602739726027398,
	"grad_norm": 0.2533169984817505,
	"learning_rate": 6.527635132074493e-05,
	"loss": 0.0264,
	"step": 920
	},
	{
	"epoch": 12.73972602739726,
	"grad_norm": 0.27445635199546814,
	"learning_rate": 6.315372975503285e-05,
	"loss": 0.0281,
	"step": 930
	},
	{
	"epoch": 12.876712328767123,
	"grad_norm": 0.21471256017684937,
	"learning_rate": 6.1050147818704e-05,
	"loss": 0.0321,
	"step": 940
	},
	{
	"epoch": 13.013698630136986,
	"grad_norm": 0.19105984270572662,
	"learning_rate": 5.896669249885851e-05,
	"loss": 0.0273,
	"step": 950
	},
	{
	"epoch": 13.150684931506849,
	"grad_norm": 0.3308360278606415,
	"learning_rate": 5.690444038253935e-05,
	"loss": 0.0343,
	"step": 960
	},
	{
	"epoch": 13.287671232876713,
	"grad_norm": 0.1988590806722641,
	"learning_rate": 5.4864457100425783e-05,
	"loss": 0.028,
	"step": 970
	},
	{
	"epoch": 13.424657534246576,
	"grad_norm": 0.1858794391155243,
	"learning_rate": 5.284779677618841e-05,
	"loss": 0.0273,
	"step": 980
	},
	{
	"epoch": 13.561643835616438,
	"grad_norm": 0.29671627283096313,
	"learning_rate": 5.0855501481790305e-05,
	"loss": 0.0271,
	"step": 990
	},
	{
	"epoch": 13.698630136986301,
	"grad_norm": 0.17693527042865753,
	"learning_rate": 4.8888600699015496e-05,
	"loss": 0.034,
	"step": 1000
	},
	{
	"epoch": 13.835616438356164,
	"grad_norm": 0.31038013100624084,
	"learning_rate": 4.694811078750338e-05,
	"loss": 0.0251,
	"step": 1010
	},
	{
	"epoch": 13.972602739726028,
	"grad_norm": 0.3317829668521881,
	"learning_rate": 4.50350344595635e-05,
	"loss": 0.0334,
	"step": 1020
	},
	{
	"epoch": 14.10958904109589,
	"grad_norm": 0.1818408966064453,
	"learning_rate": 4.315036026204262e-05,
	"loss": 0.0272,
	"step": 1030
	},
	{
	"epoch": 14.246575342465754,
	"grad_norm": 0.2105715572834015,
	"learning_rate": 4.129506206551138e-05,
	"loss": 0.025,
	"step": 1040
	},
	{
	"epoch": 14.383561643835616,
	"grad_norm": 0.18613150715827942,
	"learning_rate": 3.947009856103465e-05,
	"loss": 0.0238,
	"step": 1050
	},
	{
	"epoch": 14.520547945205479,
	"grad_norm": 0.2959461212158203,
	"learning_rate": 3.767641276478563e-05,
	"loss": 0.0249,
	"step": 1060
	},
	{
	"epoch": 14.657534246575342,
	"grad_norm": 0.18495745956897736,
	"learning_rate": 3.591493153075966e-05,
	"loss": 0.0214,
	"step": 1070
	},
	{
	"epoch": 14.794520547945206,
	"grad_norm": 0.1501263529062271,
	"learning_rate": 3.41865650718396e-05,
	"loss": 0.0266,
	"step": 1080
	},
	{
	"epoch": 14.931506849315069,
	"grad_norm": 0.3387095332145691,
	"learning_rate": 3.24922064894601e-05,
	"loss": 0.0268,
	"step": 1090
	},
	{
	"epoch": 15.068493150684931,
	"grad_norm": 0.23434942960739136,
	"learning_rate": 3.083273131211382e-05,
	"loss": 0.0272,
	"step": 1100
	},
	{
	"epoch": 15.205479452054794,
	"grad_norm": 0.163187175989151,
	"learning_rate": 2.920899704293849e-05,
	"loss": 0.0232,
	"step": 1110
	},
	{
	"epoch": 15.342465753424657,
	"grad_norm": 0.20000265538692474,
	"learning_rate": 2.762184271661785e-05,
	"loss": 0.0261,
	"step": 1120
	},
	{
	"epoch": 15.479452054794521,
	"grad_norm": 0.18943333625793457,
	"learning_rate": 2.6072088465826038e-05,
	"loss": 0.0246,
	"step": 1130
	},
	{
	"epoch": 15.616438356164384,
	"grad_norm": 0.2833252251148224,
	"learning_rate": 2.4560535097439108e-05,
	"loss": 0.0253,
	"step": 1140
	},
	{
	"epoch": 15.753424657534246,
	"grad_norm": 0.1302843540906906,
	"learning_rate": 2.308796367873296e-05,
	"loss": 0.0246,
	"step": 1150
	},
	{
	"epoch": 15.89041095890411,
	"grad_norm": 0.16615238785743713,
	"learning_rate": 2.165513513378121e-05,
	"loss": 0.0254,
	"step": 1160
	},
	{
	"epoch": 16.027397260273972,
	"grad_norm": 0.17113815248012543,
	"learning_rate": 2.0262789850261798e-05,
	"loss": 0.0288,
	"step": 1170
	},
	{
	"epoch": 16.164383561643834,
	"grad_norm": 0.21394069492816925,
	"learning_rate": 1.8911647296875147e-05,
	"loss": 0.025,
	"step": 1180
	},
	{
	"epoch": 16.301369863013697,
	"grad_norm": 0.2763649523258209,
	"learning_rate": 1.7602405651572275e-05,
	"loss": 0.0219,
	"step": 1190
	},
	{
	"epoch": 16.438356164383563,
	"grad_norm": 0.13925646245479584,
	"learning_rate": 1.6335741440784035e-05,
	"loss": 0.0217,
	"step": 1200
	},
	{
	"epoch": 16.575342465753426,
	"grad_norm": 0.20826192200183868,
	"learning_rate": 1.511230918983867e-05,
	"loss": 0.023,
	"step": 1210
	},
	{
	"epoch": 16.71232876712329,
	"grad_norm": 0.2256271094083786,
	"learning_rate": 1.3932741084747913e-05,
	"loss": 0.023,
	"step": 1220
	},
	{
	"epoch": 16.84931506849315,
	"grad_norm": 0.27016547322273254,
	"learning_rate": 1.2797646645536566e-05,
	"loss": 0.0211,
	"step": 1230
	},
	{
	"epoch": 16.986301369863014,
	"grad_norm": 0.26627489924430847,
	"learning_rate": 1.1707612411284253e-05,
	"loss": 0.0235,
	"step": 1240
	},
	{
	"epoch": 17.123287671232877,
	"grad_norm": 0.18498767912387848,
	"learning_rate": 1.0663201637042252e-05,
	"loss": 0.022,
	"step": 1250
	},
	{
	"epoch": 17.26027397260274,
	"grad_norm": 0.23852607607841492,
	"learning_rate": 9.664954002781745e-06,
	"loss": 0.0228,
	"step": 1260
	},
	{
	"epoch": 17.397260273972602,
	"grad_norm": 0.15411531925201416,
	"learning_rate": 8.713385334524283e-06,
	"loss": 0.0198,
	"step": 1270
	},
	{
	"epoch": 17.534246575342465,
	"grad_norm": 0.25403866171836853,
	"learning_rate": 7.808987337798158e-06,
	"loss": 0.0257,
	"step": 1280
	},
	{
	"epoch": 17.671232876712327,
	"grad_norm": 0.14403975009918213,
	"learning_rate": 6.952227343558671e-06,
	"loss": 0.0215,
	"step": 1290
	},
	{
	"epoch": 17.80821917808219,
	"grad_norm": 0.188527911901474,
	"learning_rate": 6.143548066703475e-06,
	"loss": 0.0224,
	"step": 1300
	},
	{
	"epoch": 17.945205479452056,
	"grad_norm": 0.1309424489736557,
	"learning_rate": 5.383367377307857e-06,
	"loss": 0.0215,
	"step": 1310
	},
	{
	"epoch": 18.08219178082192,
	"grad_norm": 0.11233002692461014,
	"learning_rate": 4.672078084698095e-06,
	"loss": 0.0211,
	"step": 1320
	},
	{
	"epoch": 18.21917808219178,
	"grad_norm": 0.22869743406772614,
	"learning_rate": 4.010047734474454e-06,
	"loss": 0.0215,
	"step": 1330
	},
	{
	"epoch": 18.356164383561644,
	"grad_norm": 0.11979719996452332,
	"learning_rate": 3.397618418588877e-06,
	"loss": 0.0273,
	"step": 1340
	},
	{
	"epoch": 18.493150684931507,
	"grad_norm": 0.2112375795841217,
	"learning_rate": 2.8351065985751766e-06,
	"loss": 0.0228,
	"step": 1350
	},
	{
	"epoch": 18.63013698630137,
	"grad_norm": 0.14134034514427185,
	"learning_rate": 2.322802942023461e-06,
	"loss": 0.0247,
	"step": 1360
	},
	{
	"epoch": 18.767123287671232,
	"grad_norm": 0.09884881973266602,
	"learning_rate": 1.8609721723830132e-06,
	"loss": 0.0196,
	"step": 1370
	},
	{
	"epoch": 18.904109589041095,
	"grad_norm": 0.14044946432113647,
	"learning_rate": 1.4498529321713584e-06,
	"loss": 0.0198,
	"step": 1380
	},
	{
	"epoch": 19.041095890410958,
	"grad_norm": 0.13853876292705536,
	"learning_rate": 1.0896576596600705e-06,
	"loss": 0.0182,
	"step": 1390
	},
	{
	"epoch": 19.17808219178082,
	"grad_norm": 0.1654110848903656,
	"learning_rate": 7.80572479101327e-07,
	"loss": 0.0229,
	"step": 1400
	},
	{
	"epoch": 19.315068493150687,
	"grad_norm": 0.15151838958263397,
	"learning_rate": 5.227571045515633e-07,
	"loss": 0.0202,
	"step": 1410
	},
	{
	"epoch": 19.45205479452055,
	"grad_norm": 0.2258201688528061,
	"learning_rate": 3.163447573422351e-07,
	"loss": 0.0197,
	"step": 1420
	},
	{
	"epoch": 19.589041095890412,
	"grad_norm": 0.24640779197216034,
	"learning_rate": 1.614420972401165e-07,
	"loss": 0.0187,
	"step": 1430
	},
	{
	"epoch": 19.726027397260275,
	"grad_norm": 0.21181590855121613,
	"learning_rate": 5.812916733284324e-08,
	"loss": 0.0198,
	"step": 1440
	},
	{
	"epoch": 19.863013698630137,
	"grad_norm": 0.14787183701992035,
	"learning_rate": 6.459352668164442e-09,
	"loss": 0.0186,
	"step": 1450
	},
	{
	"epoch": 19.931506849315067,
	"step": 1455,
	"total_flos": 1.1504025698630573e+17,
	"train_loss": 0.05927258820058554,
	"train_runtime": 1048.7401,
	"train_samples_per_second": 88.792,
	"train_steps_per_second": 1.387
	}
	],
	"logging_steps": 10,
	"max_steps": 1455,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 20,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.1504025698630573e+17,
	"train_batch_size": 64,
	"trial_name": null,
	"trial_params": null
	}