youssefkhalil320's picture
Upload folder using huggingface_hub
670e06f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.629948364888124,
"eval_steps": 5000,
"global_step": 8500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1721170395869191,
"grad_norm": 134.16664123535156,
"learning_rate": 2.224770642201835e-06,
"loss": 10.8697,
"step": 100
},
{
"epoch": 0.3442340791738382,
"grad_norm": 69.6041259765625,
"learning_rate": 4.5183486238532115e-06,
"loss": 9.1125,
"step": 200
},
{
"epoch": 0.5163511187607573,
"grad_norm": 103.4394760131836,
"learning_rate": 6.8119266055045875e-06,
"loss": 6.8873,
"step": 300
},
{
"epoch": 0.6884681583476764,
"grad_norm": 32.237342834472656,
"learning_rate": 9.08256880733945e-06,
"loss": 3.1124,
"step": 400
},
{
"epoch": 0.8605851979345955,
"grad_norm": 13.523033142089844,
"learning_rate": 1.1376146788990828e-05,
"loss": 1.0882,
"step": 500
},
{
"epoch": 1.0327022375215147,
"grad_norm": 56.13002395629883,
"learning_rate": 1.3669724770642203e-05,
"loss": 0.869,
"step": 600
},
{
"epoch": 1.2048192771084336,
"grad_norm": 4.41276741027832,
"learning_rate": 1.5963302752293578e-05,
"loss": 0.6952,
"step": 700
},
{
"epoch": 1.3769363166953528,
"grad_norm": 1.8771318197250366,
"learning_rate": 1.8256880733944955e-05,
"loss": 0.5522,
"step": 800
},
{
"epoch": 1.549053356282272,
"grad_norm": 5.131401062011719,
"learning_rate": 1.9938798928981258e-05,
"loss": 0.5184,
"step": 900
},
{
"epoch": 1.721170395869191,
"grad_norm": 3.2520999908447266,
"learning_rate": 1.9683794466403164e-05,
"loss": 0.3996,
"step": 1000
},
{
"epoch": 1.8932874354561102,
"grad_norm": 1.1916402578353882,
"learning_rate": 1.9428790003825067e-05,
"loss": 0.6316,
"step": 1100
},
{
"epoch": 2.0654044750430294,
"grad_norm": 3.3565962314605713,
"learning_rate": 1.9173785541246974e-05,
"loss": 0.5352,
"step": 1200
},
{
"epoch": 2.2375215146299485,
"grad_norm": 2.182133436203003,
"learning_rate": 1.8918781078668877e-05,
"loss": 0.3731,
"step": 1300
},
{
"epoch": 2.4096385542168672,
"grad_norm": 3.537748098373413,
"learning_rate": 1.8663776616090783e-05,
"loss": 0.3376,
"step": 1400
},
{
"epoch": 2.581755593803787,
"grad_norm": 2.066549777984619,
"learning_rate": 1.840877215351269e-05,
"loss": 0.597,
"step": 1500
},
{
"epoch": 2.7538726333907055,
"grad_norm": 2.867453098297119,
"learning_rate": 1.8153767690934592e-05,
"loss": 0.5737,
"step": 1600
},
{
"epoch": 2.9259896729776247,
"grad_norm": 0.8096536993980408,
"learning_rate": 1.7898763228356495e-05,
"loss": 0.7107,
"step": 1700
},
{
"epoch": 3.098106712564544,
"grad_norm": 5.293230056762695,
"learning_rate": 1.7643758765778402e-05,
"loss": 0.4356,
"step": 1800
},
{
"epoch": 3.270223752151463,
"grad_norm": 2.1939845085144043,
"learning_rate": 1.7388754303200308e-05,
"loss": 0.5581,
"step": 1900
},
{
"epoch": 3.442340791738382,
"grad_norm": 2.1973116397857666,
"learning_rate": 1.713374984062221e-05,
"loss": 0.2012,
"step": 2000
},
{
"epoch": 3.6144578313253013,
"grad_norm": 1.3364547491073608,
"learning_rate": 1.6878745378044118e-05,
"loss": 0.3906,
"step": 2100
},
{
"epoch": 3.7865748709122204,
"grad_norm": 3.2359094619750977,
"learning_rate": 1.662374091546602e-05,
"loss": 0.5386,
"step": 2200
},
{
"epoch": 3.958691910499139,
"grad_norm": 31.699663162231445,
"learning_rate": 1.6368736452887927e-05,
"loss": 0.2624,
"step": 2300
},
{
"epoch": 4.130808950086059,
"grad_norm": 92.98713684082031,
"learning_rate": 1.611373199030983e-05,
"loss": 0.3573,
"step": 2400
},
{
"epoch": 4.3029259896729775,
"grad_norm": 2.056157350540161,
"learning_rate": 1.5858727527731736e-05,
"loss": 0.4798,
"step": 2500
},
{
"epoch": 4.475043029259897,
"grad_norm": 7.822810649871826,
"learning_rate": 1.5606273109779423e-05,
"loss": 0.2465,
"step": 2600
},
{
"epoch": 4.647160068846816,
"grad_norm": 1.6002038717269897,
"learning_rate": 1.5351268647201326e-05,
"loss": 0.3482,
"step": 2700
},
{
"epoch": 4.8192771084337345,
"grad_norm": 2.061086416244507,
"learning_rate": 1.5096264184623233e-05,
"loss": 0.1915,
"step": 2800
},
{
"epoch": 4.991394148020654,
"grad_norm": 1.1744683980941772,
"learning_rate": 1.4841259722045136e-05,
"loss": 0.4617,
"step": 2900
},
{
"epoch": 5.163511187607573,
"grad_norm": 2.5757875442504883,
"learning_rate": 1.4586255259467042e-05,
"loss": 0.2874,
"step": 3000
},
{
"epoch": 5.335628227194492,
"grad_norm": 8.106232643127441,
"learning_rate": 1.4331250796888947e-05,
"loss": 0.4636,
"step": 3100
},
{
"epoch": 5.507745266781411,
"grad_norm": 2.139594316482544,
"learning_rate": 1.4076246334310853e-05,
"loss": 0.1344,
"step": 3200
},
{
"epoch": 5.679862306368331,
"grad_norm": 8.198427200317383,
"learning_rate": 1.3821241871732756e-05,
"loss": 0.3615,
"step": 3300
},
{
"epoch": 5.851979345955249,
"grad_norm": 0.706113338470459,
"learning_rate": 1.3566237409154661e-05,
"loss": 0.309,
"step": 3400
},
{
"epoch": 6.024096385542169,
"grad_norm": 1.0154913663864136,
"learning_rate": 1.3311232946576567e-05,
"loss": 0.1883,
"step": 3500
},
{
"epoch": 6.196213425129088,
"grad_norm": 27.715837478637695,
"learning_rate": 1.3056228483998472e-05,
"loss": 0.4029,
"step": 3600
},
{
"epoch": 6.368330464716007,
"grad_norm": 3.0514609813690186,
"learning_rate": 1.2801224021420375e-05,
"loss": 0.2082,
"step": 3700
},
{
"epoch": 6.540447504302926,
"grad_norm": 3.8193249702453613,
"learning_rate": 1.2546219558842281e-05,
"loss": 0.1333,
"step": 3800
},
{
"epoch": 6.712564543889846,
"grad_norm": 1.4768047332763672,
"learning_rate": 1.2291215096264186e-05,
"loss": 0.1509,
"step": 3900
},
{
"epoch": 6.884681583476764,
"grad_norm": 1.5106594562530518,
"learning_rate": 1.2036210633686089e-05,
"loss": 0.6264,
"step": 4000
},
{
"epoch": 7.056798623063683,
"grad_norm": 1.1024622917175293,
"learning_rate": 1.1781206171107995e-05,
"loss": 0.2177,
"step": 4100
},
{
"epoch": 7.228915662650603,
"grad_norm": 0.900026798248291,
"learning_rate": 1.15262017085299e-05,
"loss": 0.1957,
"step": 4200
},
{
"epoch": 7.401032702237521,
"grad_norm": 144.5244140625,
"learning_rate": 1.1271197245951807e-05,
"loss": 0.2887,
"step": 4300
},
{
"epoch": 7.573149741824441,
"grad_norm": 4.466265678405762,
"learning_rate": 1.101619278337371e-05,
"loss": 0.2271,
"step": 4400
},
{
"epoch": 7.74526678141136,
"grad_norm": 2.862029790878296,
"learning_rate": 1.0761188320795614e-05,
"loss": 0.3486,
"step": 4500
},
{
"epoch": 7.917383820998279,
"grad_norm": 1.178603172302246,
"learning_rate": 1.050618385821752e-05,
"loss": 0.4429,
"step": 4600
},
{
"epoch": 8.089500860585199,
"grad_norm": 6.430075645446777,
"learning_rate": 1.0251179395639424e-05,
"loss": 0.4398,
"step": 4700
},
{
"epoch": 8.261617900172118,
"grad_norm": 6.42482852935791,
"learning_rate": 9.996174933061328e-06,
"loss": 0.31,
"step": 4800
},
{
"epoch": 8.433734939759036,
"grad_norm": 6.2779622077941895,
"learning_rate": 9.743720515109015e-06,
"loss": 0.2045,
"step": 4900
},
{
"epoch": 8.605851979345955,
"grad_norm": 4.175030708312988,
"learning_rate": 9.48871605253092e-06,
"loss": 0.2583,
"step": 5000
},
{
"epoch": 8.605851979345955,
"eval_loss": 0.23712533712387085,
"eval_runtime": 13.9021,
"eval_samples_per_second": 1335.195,
"eval_steps_per_second": 10.502,
"step": 5000
},
{
"epoch": 8.777969018932874,
"grad_norm": 1.0522035360336304,
"learning_rate": 9.233711589952825e-06,
"loss": 0.2774,
"step": 5100
},
{
"epoch": 8.950086058519794,
"grad_norm": 0.9467515349388123,
"learning_rate": 8.98125717200051e-06,
"loss": 0.1902,
"step": 5200
},
{
"epoch": 9.122203098106713,
"grad_norm": 9.148195266723633,
"learning_rate": 8.726252709422416e-06,
"loss": 0.3058,
"step": 5300
},
{
"epoch": 9.294320137693632,
"grad_norm": 9.301542282104492,
"learning_rate": 8.471248246844321e-06,
"loss": 0.3742,
"step": 5400
},
{
"epoch": 9.46643717728055,
"grad_norm": 17.278079986572266,
"learning_rate": 8.216243784266226e-06,
"loss": 0.2972,
"step": 5500
},
{
"epoch": 9.638554216867469,
"grad_norm": 4.043286323547363,
"learning_rate": 7.96123932168813e-06,
"loss": 0.3084,
"step": 5600
},
{
"epoch": 9.81067125645439,
"grad_norm": 2.227259874343872,
"learning_rate": 7.706234859110035e-06,
"loss": 0.1215,
"step": 5700
},
{
"epoch": 9.982788296041308,
"grad_norm": 0.9134290218353271,
"learning_rate": 7.45123039653194e-06,
"loss": 0.1876,
"step": 5800
},
{
"epoch": 10.154905335628227,
"grad_norm": 1.4163002967834473,
"learning_rate": 7.196225933953844e-06,
"loss": 0.1702,
"step": 5900
},
{
"epoch": 10.327022375215146,
"grad_norm": 1.564228892326355,
"learning_rate": 6.94122147137575e-06,
"loss": 0.2506,
"step": 6000
},
{
"epoch": 10.499139414802066,
"grad_norm": 5.47558069229126,
"learning_rate": 6.686217008797654e-06,
"loss": 0.2852,
"step": 6100
},
{
"epoch": 10.671256454388985,
"grad_norm": 10.801889419555664,
"learning_rate": 6.431212546219559e-06,
"loss": 0.2354,
"step": 6200
},
{
"epoch": 10.843373493975903,
"grad_norm": 1.8754569292068481,
"learning_rate": 6.176208083641464e-06,
"loss": 0.214,
"step": 6300
},
{
"epoch": 11.015490533562822,
"grad_norm": 2.237508773803711,
"learning_rate": 5.9212036210633696e-06,
"loss": 0.3815,
"step": 6400
},
{
"epoch": 11.187607573149743,
"grad_norm": 13.412964820861816,
"learning_rate": 5.666199158485273e-06,
"loss": 0.0803,
"step": 6500
},
{
"epoch": 11.359724612736661,
"grad_norm": 0.996343195438385,
"learning_rate": 5.411194695907179e-06,
"loss": 0.1941,
"step": 6600
},
{
"epoch": 11.53184165232358,
"grad_norm": 64.30641174316406,
"learning_rate": 5.156190233329084e-06,
"loss": 0.1576,
"step": 6700
},
{
"epoch": 11.703958691910499,
"grad_norm": 0.9045078158378601,
"learning_rate": 4.901185770750988e-06,
"loss": 0.2911,
"step": 6800
},
{
"epoch": 11.876075731497417,
"grad_norm": 1.798627495765686,
"learning_rate": 4.646181308172894e-06,
"loss": 0.4913,
"step": 6900
},
{
"epoch": 12.048192771084338,
"grad_norm": 6.165831565856934,
"learning_rate": 4.3911768455947986e-06,
"loss": 0.2759,
"step": 7000
},
{
"epoch": 12.220309810671257,
"grad_norm": 3.460507392883301,
"learning_rate": 4.136172383016703e-06,
"loss": 0.2928,
"step": 7100
},
{
"epoch": 12.392426850258175,
"grad_norm": 5.5960187911987305,
"learning_rate": 3.881167920438608e-06,
"loss": 0.2181,
"step": 7200
},
{
"epoch": 12.564543889845094,
"grad_norm": 1.473883032798767,
"learning_rate": 3.6261634578605126e-06,
"loss": 0.1286,
"step": 7300
},
{
"epoch": 12.736660929432015,
"grad_norm": 10.179828643798828,
"learning_rate": 3.3711589952824173e-06,
"loss": 0.3342,
"step": 7400
},
{
"epoch": 12.908777969018933,
"grad_norm": 3.2593960762023926,
"learning_rate": 3.1187045773301034e-06,
"loss": 0.1577,
"step": 7500
},
{
"epoch": 13.080895008605852,
"grad_norm": 1.169028878211975,
"learning_rate": 2.863700114752008e-06,
"loss": 0.2578,
"step": 7600
},
{
"epoch": 13.25301204819277,
"grad_norm": 1.2204866409301758,
"learning_rate": 2.6086956521739132e-06,
"loss": 0.2844,
"step": 7700
},
{
"epoch": 13.42512908777969,
"grad_norm": 1.1637088060379028,
"learning_rate": 2.353691189595818e-06,
"loss": 0.0917,
"step": 7800
},
{
"epoch": 13.59724612736661,
"grad_norm": 0.6639829277992249,
"learning_rate": 2.098686727017723e-06,
"loss": 0.2617,
"step": 7900
},
{
"epoch": 13.769363166953529,
"grad_norm": 4.155405044555664,
"learning_rate": 1.843682264439628e-06,
"loss": 0.3021,
"step": 8000
},
{
"epoch": 13.941480206540447,
"grad_norm": 1.9663244485855103,
"learning_rate": 1.5886778018615326e-06,
"loss": 0.1036,
"step": 8100
},
{
"epoch": 14.113597246127366,
"grad_norm": 32.85494613647461,
"learning_rate": 1.3336733392834375e-06,
"loss": 0.5471,
"step": 8200
},
{
"epoch": 14.285714285714286,
"grad_norm": 0.6067169904708862,
"learning_rate": 1.0786688767053424e-06,
"loss": 0.2395,
"step": 8300
},
{
"epoch": 14.457831325301205,
"grad_norm": 0.9747382998466492,
"learning_rate": 8.236644141272474e-07,
"loss": 0.2664,
"step": 8400
},
{
"epoch": 14.629948364888124,
"grad_norm": 21.624757766723633,
"learning_rate": 5.686599515491522e-07,
"loss": 0.2697,
"step": 8500
}
],
"logging_steps": 100,
"max_steps": 8715,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}