|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1737089201877935, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011737089201877935, |
|
"grad_norm": 0.27773135900497437, |
|
"learning_rate": 4e-05, |
|
"loss": 1.1957, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002347417840375587, |
|
"grad_norm": 0.26547771692276, |
|
"learning_rate": 8e-05, |
|
"loss": 1.1284, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0035211267605633804, |
|
"grad_norm": 0.236787810921669, |
|
"learning_rate": 0.00012, |
|
"loss": 1.1823, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004694835680751174, |
|
"grad_norm": 0.2459038347005844, |
|
"learning_rate": 0.00016, |
|
"loss": 1.1409, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005868544600938967, |
|
"grad_norm": 0.2526487410068512, |
|
"learning_rate": 0.0002, |
|
"loss": 1.12, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007042253521126761, |
|
"grad_norm": 0.2795103192329407, |
|
"learning_rate": 0.00019976387249114524, |
|
"loss": 1.1579, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.008215962441314555, |
|
"grad_norm": 0.2876183092594147, |
|
"learning_rate": 0.00019952774498229045, |
|
"loss": 1.1211, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.009389671361502348, |
|
"grad_norm": 0.3014296293258667, |
|
"learning_rate": 0.00019929161747343565, |
|
"loss": 1.1118, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01056338028169014, |
|
"grad_norm": 0.29106494784355164, |
|
"learning_rate": 0.00019905548996458088, |
|
"loss": 1.1787, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011737089201877934, |
|
"grad_norm": 0.3211474120616913, |
|
"learning_rate": 0.00019881936245572609, |
|
"loss": 1.1004, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012910798122065728, |
|
"grad_norm": 0.3358176350593567, |
|
"learning_rate": 0.00019858323494687132, |
|
"loss": 1.1099, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.014084507042253521, |
|
"grad_norm": 0.3236922323703766, |
|
"learning_rate": 0.00019834710743801655, |
|
"loss": 1.048, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.015258215962441314, |
|
"grad_norm": 0.31388312578201294, |
|
"learning_rate": 0.00019811097992916175, |
|
"loss": 1.0532, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01643192488262911, |
|
"grad_norm": 0.320402055978775, |
|
"learning_rate": 0.00019787485242030696, |
|
"loss": 1.0757, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.017605633802816902, |
|
"grad_norm": 0.32999494671821594, |
|
"learning_rate": 0.0001976387249114522, |
|
"loss": 1.122, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.018779342723004695, |
|
"grad_norm": 0.30936214327812195, |
|
"learning_rate": 0.00019740259740259742, |
|
"loss": 1.1156, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01995305164319249, |
|
"grad_norm": 0.2863931953907013, |
|
"learning_rate": 0.00019716646989374263, |
|
"loss": 1.0414, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02112676056338028, |
|
"grad_norm": 0.29143351316452026, |
|
"learning_rate": 0.00019693034238488786, |
|
"loss": 1.0379, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.022300469483568074, |
|
"grad_norm": 0.28874626755714417, |
|
"learning_rate": 0.0001966942148760331, |
|
"loss": 1.0388, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.023474178403755867, |
|
"grad_norm": 0.30588293075561523, |
|
"learning_rate": 0.00019645808736717827, |
|
"loss": 1.0515, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02464788732394366, |
|
"grad_norm": 0.29231536388397217, |
|
"learning_rate": 0.0001962219598583235, |
|
"loss": 1.0472, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.025821596244131457, |
|
"grad_norm": 0.2783581614494324, |
|
"learning_rate": 0.00019598583234946873, |
|
"loss": 1.0608, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02699530516431925, |
|
"grad_norm": 0.29816293716430664, |
|
"learning_rate": 0.00019574970484061393, |
|
"loss": 1.0986, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.028169014084507043, |
|
"grad_norm": 0.27919578552246094, |
|
"learning_rate": 0.00019551357733175916, |
|
"loss": 1.0265, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.029342723004694836, |
|
"grad_norm": 0.3144524097442627, |
|
"learning_rate": 0.00019527744982290437, |
|
"loss": 1.0699, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03051643192488263, |
|
"grad_norm": 0.3090282678604126, |
|
"learning_rate": 0.0001950413223140496, |
|
"loss": 1.0601, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03169014084507042, |
|
"grad_norm": 0.30304697155952454, |
|
"learning_rate": 0.0001948051948051948, |
|
"loss": 1.0926, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03286384976525822, |
|
"grad_norm": 0.29015883803367615, |
|
"learning_rate": 0.00019456906729634004, |
|
"loss": 1.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03403755868544601, |
|
"grad_norm": 0.29359501600265503, |
|
"learning_rate": 0.00019433293978748527, |
|
"loss": 0.988, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.035211267605633804, |
|
"grad_norm": 0.2772333323955536, |
|
"learning_rate": 0.00019409681227863047, |
|
"loss": 0.9758, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.036384976525821594, |
|
"grad_norm": 0.2761421799659729, |
|
"learning_rate": 0.00019386068476977568, |
|
"loss": 0.9926, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03755868544600939, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00019386068476977568, |
|
"loss": 1.0944, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03873239436619718, |
|
"grad_norm": 0.2766799330711365, |
|
"learning_rate": 0.0001936245572609209, |
|
"loss": 0.9813, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03990610328638498, |
|
"grad_norm": 0.28922533988952637, |
|
"learning_rate": 0.0001933884297520661, |
|
"loss": 0.9839, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04107981220657277, |
|
"grad_norm": 0.28271371126174927, |
|
"learning_rate": 0.00019315230224321134, |
|
"loss": 1.0125, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04225352112676056, |
|
"grad_norm": 0.2955509424209595, |
|
"learning_rate": 0.00019291617473435658, |
|
"loss": 1.0049, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04342723004694836, |
|
"grad_norm": 0.2909109592437744, |
|
"learning_rate": 0.00019268004722550178, |
|
"loss": 1.0015, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04460093896713615, |
|
"grad_norm": 0.29657021164894104, |
|
"learning_rate": 0.00019244391971664698, |
|
"loss": 1.0107, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.045774647887323945, |
|
"grad_norm": 0.29010507464408875, |
|
"learning_rate": 0.00019220779220779222, |
|
"loss": 0.9918, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.046948356807511735, |
|
"grad_norm": 0.2906627058982849, |
|
"learning_rate": 0.00019197166469893745, |
|
"loss": 0.9843, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04812206572769953, |
|
"grad_norm": 0.2919193208217621, |
|
"learning_rate": 0.00019173553719008265, |
|
"loss": 0.9889, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04929577464788732, |
|
"grad_norm": 0.3219091296195984, |
|
"learning_rate": 0.00019149940968122788, |
|
"loss": 0.9979, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05046948356807512, |
|
"grad_norm": 0.29512301087379456, |
|
"learning_rate": 0.0001912632821723731, |
|
"loss": 0.989, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.051643192488262914, |
|
"grad_norm": 0.3190619647502899, |
|
"learning_rate": 0.0001910271546635183, |
|
"loss": 0.9563, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0528169014084507, |
|
"grad_norm": 0.310253381729126, |
|
"learning_rate": 0.00019079102715466352, |
|
"loss": 1.037, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0539906103286385, |
|
"grad_norm": 0.3140093684196472, |
|
"learning_rate": 0.00019055489964580876, |
|
"loss": 0.9687, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05516431924882629, |
|
"grad_norm": 0.2816644310951233, |
|
"learning_rate": 0.00019031877213695396, |
|
"loss": 0.9372, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.056338028169014086, |
|
"grad_norm": 0.3012441396713257, |
|
"learning_rate": 0.0001900826446280992, |
|
"loss": 0.9968, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.057511737089201875, |
|
"grad_norm": 0.29789185523986816, |
|
"learning_rate": 0.0001898465171192444, |
|
"loss": 0.9143, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05868544600938967, |
|
"grad_norm": 0.29454007744789124, |
|
"learning_rate": 0.00018961038961038963, |
|
"loss": 0.9837, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05985915492957746, |
|
"grad_norm": 0.321218341588974, |
|
"learning_rate": 0.00018937426210153483, |
|
"loss": 1.0135, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06103286384976526, |
|
"grad_norm": 0.30039164423942566, |
|
"learning_rate": 0.00018913813459268006, |
|
"loss": 0.9639, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.062206572769953054, |
|
"grad_norm": 0.3052615225315094, |
|
"learning_rate": 0.0001889020070838253, |
|
"loss": 0.9401, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06338028169014084, |
|
"grad_norm": 0.3177138864994049, |
|
"learning_rate": 0.00018866587957497047, |
|
"loss": 0.9626, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06455399061032864, |
|
"grad_norm": 0.3098903298377991, |
|
"learning_rate": 0.0001884297520661157, |
|
"loss": 0.9535, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06572769953051644, |
|
"grad_norm": 0.33165299892425537, |
|
"learning_rate": 0.00018819362455726094, |
|
"loss": 1.0475, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06690140845070422, |
|
"grad_norm": 0.3054540455341339, |
|
"learning_rate": 0.00018795749704840614, |
|
"loss": 0.988, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06807511737089202, |
|
"grad_norm": 0.3412969708442688, |
|
"learning_rate": 0.00018772136953955137, |
|
"loss": 0.9531, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06924882629107981, |
|
"grad_norm": 0.3173505663871765, |
|
"learning_rate": 0.0001874852420306966, |
|
"loss": 1.0037, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07042253521126761, |
|
"grad_norm": 0.29377281665802, |
|
"learning_rate": 0.0001872491145218418, |
|
"loss": 0.9205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0715962441314554, |
|
"grad_norm": 0.2970433831214905, |
|
"learning_rate": 0.000187012987012987, |
|
"loss": 0.8902, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07276995305164319, |
|
"grad_norm": 0.3081493675708771, |
|
"learning_rate": 0.00018677685950413224, |
|
"loss": 0.9498, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07394366197183098, |
|
"grad_norm": 0.31438371539115906, |
|
"learning_rate": 0.00018654073199527747, |
|
"loss": 0.9406, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07511737089201878, |
|
"grad_norm": 0.29640915989875793, |
|
"learning_rate": 0.00018630460448642268, |
|
"loss": 0.8948, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07629107981220658, |
|
"grad_norm": 0.33342233300209045, |
|
"learning_rate": 0.00018606847697756788, |
|
"loss": 0.941, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07746478873239436, |
|
"grad_norm": 0.31546634435653687, |
|
"learning_rate": 0.00018583234946871312, |
|
"loss": 0.9392, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07863849765258216, |
|
"grad_norm": 0.31528937816619873, |
|
"learning_rate": 0.00018559622195985832, |
|
"loss": 0.9293, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.07981220657276995, |
|
"grad_norm": 0.33473101258277893, |
|
"learning_rate": 0.00018536009445100355, |
|
"loss": 0.9214, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08098591549295775, |
|
"grad_norm": 0.6588060259819031, |
|
"learning_rate": 0.00018512396694214878, |
|
"loss": 0.944, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.08215962441314555, |
|
"grad_norm": 0.30120280385017395, |
|
"learning_rate": 0.000184887839433294, |
|
"loss": 0.9171, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.3417011499404907, |
|
"learning_rate": 0.0001846517119244392, |
|
"loss": 0.9382, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.08450704225352113, |
|
"grad_norm": 0.3202987313270569, |
|
"learning_rate": 0.00018441558441558442, |
|
"loss": 0.931, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08568075117370892, |
|
"grad_norm": 0.3390517234802246, |
|
"learning_rate": 0.00018417945690672965, |
|
"loss": 0.9218, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08685446009389672, |
|
"grad_norm": 0.32109472155570984, |
|
"learning_rate": 0.00018394332939787486, |
|
"loss": 0.9226, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0880281690140845, |
|
"grad_norm": 0.3435365855693817, |
|
"learning_rate": 0.0001837072018890201, |
|
"loss": 0.9402, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0892018779342723, |
|
"grad_norm": 0.3335697054862976, |
|
"learning_rate": 0.00018347107438016532, |
|
"loss": 0.9385, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0903755868544601, |
|
"grad_norm": 0.32050758600234985, |
|
"learning_rate": 0.0001832349468713105, |
|
"loss": 0.8992, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.09154929577464789, |
|
"grad_norm": 0.32620421051979065, |
|
"learning_rate": 0.00018299881936245573, |
|
"loss": 0.9476, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09272300469483569, |
|
"grad_norm": 0.33306750655174255, |
|
"learning_rate": 0.00018276269185360096, |
|
"loss": 0.9458, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.09389671361502347, |
|
"grad_norm": 0.3500649034976959, |
|
"learning_rate": 0.00018252656434474617, |
|
"loss": 0.9612, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09507042253521127, |
|
"grad_norm": 0.3186359405517578, |
|
"learning_rate": 0.0001822904368358914, |
|
"loss": 0.9527, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09624413145539906, |
|
"grad_norm": 0.3317716717720032, |
|
"learning_rate": 0.0001820543093270366, |
|
"loss": 0.9648, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09741784037558686, |
|
"grad_norm": 0.3196907639503479, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 0.9643, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09859154929577464, |
|
"grad_norm": 0.3195818066596985, |
|
"learning_rate": 0.00018158205430932704, |
|
"loss": 0.9121, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09976525821596244, |
|
"grad_norm": 0.33151793479919434, |
|
"learning_rate": 0.00018134592680047227, |
|
"loss": 0.9051, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.10093896713615023, |
|
"grad_norm": 0.3110804259777069, |
|
"learning_rate": 0.00018110979929161747, |
|
"loss": 0.9241, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10211267605633803, |
|
"grad_norm": 0.34278568625450134, |
|
"learning_rate": 0.0001808736717827627, |
|
"loss": 0.9634, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.10328638497652583, |
|
"grad_norm": 0.34013500809669495, |
|
"learning_rate": 0.0001806375442739079, |
|
"loss": 0.8822, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.10446009389671361, |
|
"grad_norm": 0.3449755012989044, |
|
"learning_rate": 0.00018040141676505314, |
|
"loss": 0.969, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1056338028169014, |
|
"grad_norm": 0.3166862726211548, |
|
"learning_rate": 0.00018016528925619835, |
|
"loss": 0.885, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1068075117370892, |
|
"grad_norm": 0.3260084092617035, |
|
"learning_rate": 0.00017992916174734358, |
|
"loss": 0.8908, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.107981220657277, |
|
"grad_norm": 0.32791605591773987, |
|
"learning_rate": 0.0001796930342384888, |
|
"loss": 0.8822, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.10915492957746478, |
|
"grad_norm": 0.31909653544425964, |
|
"learning_rate": 0.000179456906729634, |
|
"loss": 0.8463, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.11032863849765258, |
|
"grad_norm": 0.3413308262825012, |
|
"learning_rate": 0.00017922077922077922, |
|
"loss": 0.9232, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11150234741784038, |
|
"grad_norm": 0.32644134759902954, |
|
"learning_rate": 0.00017898465171192445, |
|
"loss": 0.9113, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.11267605633802817, |
|
"grad_norm": 0.33090126514434814, |
|
"learning_rate": 0.00017874852420306965, |
|
"loss": 0.9286, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11384976525821597, |
|
"grad_norm": 0.37200361490249634, |
|
"learning_rate": 0.00017851239669421489, |
|
"loss": 0.9239, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.11502347417840375, |
|
"grad_norm": 0.3274000585079193, |
|
"learning_rate": 0.00017827626918536012, |
|
"loss": 0.9038, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11619718309859155, |
|
"grad_norm": 0.3768482506275177, |
|
"learning_rate": 0.00017804014167650532, |
|
"loss": 0.8558, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.11737089201877934, |
|
"grad_norm": 0.32970595359802246, |
|
"learning_rate": 0.00017780401416765053, |
|
"loss": 0.9057, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11854460093896714, |
|
"grad_norm": 0.37230944633483887, |
|
"learning_rate": 0.00017756788665879576, |
|
"loss": 0.9211, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.11971830985915492, |
|
"grad_norm": 0.352201372385025, |
|
"learning_rate": 0.000177331759149941, |
|
"loss": 0.9497, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12089201877934272, |
|
"grad_norm": 0.363364577293396, |
|
"learning_rate": 0.0001770956316410862, |
|
"loss": 0.9535, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.12206572769953052, |
|
"grad_norm": 0.3388724625110626, |
|
"learning_rate": 0.00017685950413223143, |
|
"loss": 0.8908, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.12323943661971831, |
|
"grad_norm": 0.34684258699417114, |
|
"learning_rate": 0.00017662337662337663, |
|
"loss": 0.8981, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.12441314553990611, |
|
"grad_norm": 0.31892621517181396, |
|
"learning_rate": 0.00017638724911452183, |
|
"loss": 0.8461, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1255868544600939, |
|
"grad_norm": 0.32913845777511597, |
|
"learning_rate": 0.00017615112160566707, |
|
"loss": 0.9087, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1267605633802817, |
|
"grad_norm": 0.3695410490036011, |
|
"learning_rate": 0.0001759149940968123, |
|
"loss": 0.8899, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12793427230046947, |
|
"grad_norm": 0.3455798923969269, |
|
"learning_rate": 0.0001756788665879575, |
|
"loss": 0.9045, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.12910798122065728, |
|
"grad_norm": 0.3612275719642639, |
|
"learning_rate": 0.0001754427390791027, |
|
"loss": 0.8861, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13028169014084506, |
|
"grad_norm": 0.4106651544570923, |
|
"learning_rate": 0.00017520661157024794, |
|
"loss": 0.9152, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.13145539906103287, |
|
"grad_norm": 0.3604993224143982, |
|
"learning_rate": 0.00017497048406139317, |
|
"loss": 0.9141, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.13262910798122066, |
|
"grad_norm": 0.3496919870376587, |
|
"learning_rate": 0.00017473435655253837, |
|
"loss": 0.9061, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.13380281690140844, |
|
"grad_norm": 0.33643972873687744, |
|
"learning_rate": 0.0001744982290436836, |
|
"loss": 0.8877, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.13497652582159625, |
|
"grad_norm": 0.33064204454421997, |
|
"learning_rate": 0.00017426210153482884, |
|
"loss": 0.8967, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.13615023474178403, |
|
"grad_norm": 0.37868356704711914, |
|
"learning_rate": 0.00017402597402597401, |
|
"loss": 0.8957, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13732394366197184, |
|
"grad_norm": 0.34379109740257263, |
|
"learning_rate": 0.00017378984651711925, |
|
"loss": 0.9332, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.13849765258215962, |
|
"grad_norm": 0.37193912267684937, |
|
"learning_rate": 0.00017355371900826448, |
|
"loss": 0.9513, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1396713615023474, |
|
"grad_norm": 0.33701232075691223, |
|
"learning_rate": 0.00017331759149940968, |
|
"loss": 0.8946, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"grad_norm": 0.35765206813812256, |
|
"learning_rate": 0.0001730814639905549, |
|
"loss": 0.8931, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.142018779342723, |
|
"grad_norm": 0.3511311411857605, |
|
"learning_rate": 0.00017284533648170012, |
|
"loss": 0.9042, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.1431924882629108, |
|
"grad_norm": 0.33516445755958557, |
|
"learning_rate": 0.00017260920897284535, |
|
"loss": 0.8564, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1443661971830986, |
|
"grad_norm": 0.385959267616272, |
|
"learning_rate": 0.00017237308146399055, |
|
"loss": 0.963, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.14553990610328638, |
|
"grad_norm": 0.34608641266822815, |
|
"learning_rate": 0.00017213695395513578, |
|
"loss": 0.8666, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1467136150234742, |
|
"grad_norm": 0.3705556392669678, |
|
"learning_rate": 0.00017190082644628102, |
|
"loss": 0.7783, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.14788732394366197, |
|
"grad_norm": 0.3213210701942444, |
|
"learning_rate": 0.00017166469893742622, |
|
"loss": 0.8428, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14906103286384975, |
|
"grad_norm": 0.3903498351573944, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.8418, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.15023474178403756, |
|
"grad_norm": 0.3556365668773651, |
|
"learning_rate": 0.00017119244391971666, |
|
"loss": 0.8612, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.15140845070422534, |
|
"grad_norm": 0.3734995424747467, |
|
"learning_rate": 0.00017095631641086186, |
|
"loss": 0.8845, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.15258215962441316, |
|
"grad_norm": 0.33735260367393494, |
|
"learning_rate": 0.0001707201889020071, |
|
"loss": 0.8752, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15375586854460094, |
|
"grad_norm": 0.38340267539024353, |
|
"learning_rate": 0.00017048406139315232, |
|
"loss": 0.8847, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.15492957746478872, |
|
"grad_norm": 0.3654419779777527, |
|
"learning_rate": 0.00017024793388429753, |
|
"loss": 0.8448, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15610328638497653, |
|
"grad_norm": 0.3601568341255188, |
|
"learning_rate": 0.00017001180637544273, |
|
"loss": 0.8981, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1572769953051643, |
|
"grad_norm": 0.40733832120895386, |
|
"learning_rate": 0.00016977567886658796, |
|
"loss": 0.9135, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.15845070422535212, |
|
"grad_norm": 0.34627673029899597, |
|
"learning_rate": 0.0001695395513577332, |
|
"loss": 0.9164, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1596244131455399, |
|
"grad_norm": 0.3865872621536255, |
|
"learning_rate": 0.0001693034238488784, |
|
"loss": 0.9222, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1607981220657277, |
|
"grad_norm": 0.4011456072330475, |
|
"learning_rate": 0.00016906729634002363, |
|
"loss": 0.8843, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1619718309859155, |
|
"grad_norm": 0.32259878516197205, |
|
"learning_rate": 0.00016883116883116884, |
|
"loss": 0.8427, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.16314553990610328, |
|
"grad_norm": 0.3807618319988251, |
|
"learning_rate": 0.00016859504132231404, |
|
"loss": 0.8684, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1643192488262911, |
|
"grad_norm": 0.3658106327056885, |
|
"learning_rate": 0.00016835891381345927, |
|
"loss": 0.9024, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16549295774647887, |
|
"grad_norm": 0.3638787865638733, |
|
"learning_rate": 0.0001681227863046045, |
|
"loss": 0.8582, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.3839091360569, |
|
"learning_rate": 0.0001678866587957497, |
|
"loss": 0.8543, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.16784037558685447, |
|
"grad_norm": 0.33579927682876587, |
|
"learning_rate": 0.00016765053128689494, |
|
"loss": 0.8765, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.16901408450704225, |
|
"grad_norm": 0.35091203451156616, |
|
"learning_rate": 0.00016741440377804014, |
|
"loss": 0.8504, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.17018779342723006, |
|
"grad_norm": 0.35823047161102295, |
|
"learning_rate": 0.00016717827626918538, |
|
"loss": 0.8534, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.17136150234741784, |
|
"grad_norm": 0.37154486775398254, |
|
"learning_rate": 0.00016694214876033058, |
|
"loss": 0.851, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.17253521126760563, |
|
"grad_norm": 0.33140066266059875, |
|
"learning_rate": 0.0001667060212514758, |
|
"loss": 0.8136, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.17370892018779344, |
|
"grad_norm": 0.37408292293548584, |
|
"learning_rate": 0.00016646989374262104, |
|
"loss": 0.8933, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.17488262910798122, |
|
"grad_norm": 0.36203357577323914, |
|
"learning_rate": 0.00016623376623376625, |
|
"loss": 0.8747, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.176056338028169, |
|
"grad_norm": 0.35033532977104187, |
|
"learning_rate": 0.00016599763872491145, |
|
"loss": 0.8273, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1772300469483568, |
|
"grad_norm": 0.345048189163208, |
|
"learning_rate": 0.00016576151121605668, |
|
"loss": 0.8698, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1784037558685446, |
|
"grad_norm": 0.3592989146709442, |
|
"learning_rate": 0.0001655253837072019, |
|
"loss": 0.8483, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.1795774647887324, |
|
"grad_norm": 0.3685864806175232, |
|
"learning_rate": 0.00016528925619834712, |
|
"loss": 0.915, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1807511737089202, |
|
"grad_norm": 0.3427909314632416, |
|
"learning_rate": 0.00016505312868949235, |
|
"loss": 0.8321, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.18192488262910797, |
|
"grad_norm": 0.34697192907333374, |
|
"learning_rate": 0.00016481700118063756, |
|
"loss": 0.8801, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.18309859154929578, |
|
"grad_norm": 0.3387276530265808, |
|
"learning_rate": 0.00016458087367178276, |
|
"loss": 0.8237, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.18427230046948356, |
|
"grad_norm": 0.3547775447368622, |
|
"learning_rate": 0.000164344746162928, |
|
"loss": 0.8645, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.18544600938967137, |
|
"grad_norm": 0.3342725932598114, |
|
"learning_rate": 0.00016410861865407322, |
|
"loss": 0.82, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.18661971830985916, |
|
"grad_norm": 0.4317960739135742, |
|
"learning_rate": 0.00016387249114521843, |
|
"loss": 0.8614, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.18779342723004694, |
|
"grad_norm": 0.35031062364578247, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 0.8193, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18896713615023475, |
|
"grad_norm": 0.3616986572742462, |
|
"learning_rate": 0.00016340023612750886, |
|
"loss": 0.8571, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.19014084507042253, |
|
"grad_norm": 0.36284518241882324, |
|
"learning_rate": 0.00016316410861865407, |
|
"loss": 0.8555, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.19131455399061034, |
|
"grad_norm": 0.42962291836738586, |
|
"learning_rate": 0.0001629279811097993, |
|
"loss": 0.8574, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.19248826291079812, |
|
"grad_norm": 0.330268532037735, |
|
"learning_rate": 0.00016269185360094453, |
|
"loss": 0.8952, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1936619718309859, |
|
"grad_norm": 0.33917295932769775, |
|
"learning_rate": 0.00016245572609208974, |
|
"loss": 0.8588, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.19483568075117372, |
|
"grad_norm": 0.3963412046432495, |
|
"learning_rate": 0.00016221959858323494, |
|
"loss": 0.8451, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1960093896713615, |
|
"grad_norm": 0.33864182233810425, |
|
"learning_rate": 0.00016198347107438017, |
|
"loss": 0.8734, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.19718309859154928, |
|
"grad_norm": 0.3751653730869293, |
|
"learning_rate": 0.00016174734356552538, |
|
"loss": 0.8786, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1983568075117371, |
|
"grad_norm": 0.4138842821121216, |
|
"learning_rate": 0.0001615112160566706, |
|
"loss": 0.8608, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.19953051643192488, |
|
"grad_norm": 0.3747748136520386, |
|
"learning_rate": 0.00016127508854781584, |
|
"loss": 0.8901, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2007042253521127, |
|
"grad_norm": 0.3302014172077179, |
|
"learning_rate": 0.00016103896103896104, |
|
"loss": 0.8538, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.20187793427230047, |
|
"grad_norm": 0.36144372820854187, |
|
"learning_rate": 0.00016080283353010625, |
|
"loss": 0.8634, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.20305164319248825, |
|
"grad_norm": 0.3579455018043518, |
|
"learning_rate": 0.00016056670602125148, |
|
"loss": 0.8536, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.20422535211267606, |
|
"grad_norm": 0.3475671410560608, |
|
"learning_rate": 0.0001603305785123967, |
|
"loss": 0.8304, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.20539906103286384, |
|
"grad_norm": 0.34114810824394226, |
|
"learning_rate": 0.00016009445100354192, |
|
"loss": 0.8276, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.20657276995305165, |
|
"grad_norm": 0.32198190689086914, |
|
"learning_rate": 0.00015985832349468715, |
|
"loss": 0.815, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.20774647887323944, |
|
"grad_norm": 0.4003874361515045, |
|
"learning_rate": 0.00015962219598583238, |
|
"loss": 0.8523, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.20892018779342722, |
|
"grad_norm": 0.32290229201316833, |
|
"learning_rate": 0.00015938606847697756, |
|
"loss": 0.8465, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.21009389671361503, |
|
"grad_norm": 0.35729506611824036, |
|
"learning_rate": 0.0001591499409681228, |
|
"loss": 0.8437, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2112676056338028, |
|
"grad_norm": 0.33743324875831604, |
|
"learning_rate": 0.00015891381345926802, |
|
"loss": 0.8351, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21244131455399062, |
|
"grad_norm": 0.34673774242401123, |
|
"learning_rate": 0.00015867768595041322, |
|
"loss": 0.8146, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.2136150234741784, |
|
"grad_norm": 0.37883323431015015, |
|
"learning_rate": 0.00015844155844155845, |
|
"loss": 0.8889, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2147887323943662, |
|
"grad_norm": 0.34172534942626953, |
|
"learning_rate": 0.00015820543093270366, |
|
"loss": 0.8479, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.215962441314554, |
|
"grad_norm": 0.39948219060897827, |
|
"learning_rate": 0.0001579693034238489, |
|
"loss": 0.8383, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.21713615023474178, |
|
"grad_norm": 0.33746814727783203, |
|
"learning_rate": 0.0001577331759149941, |
|
"loss": 0.8713, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.21830985915492956, |
|
"grad_norm": 0.34141069650650024, |
|
"learning_rate": 0.00015749704840613933, |
|
"loss": 0.8303, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.21948356807511737, |
|
"grad_norm": 0.35994264483451843, |
|
"learning_rate": 0.00015726092089728456, |
|
"loss": 0.7919, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.22065727699530516, |
|
"grad_norm": 0.34234684705734253, |
|
"learning_rate": 0.00015702479338842976, |
|
"loss": 0.8225, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.22183098591549297, |
|
"grad_norm": 0.3601793050765991, |
|
"learning_rate": 0.00015678866587957497, |
|
"loss": 0.8395, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.22300469483568075, |
|
"grad_norm": 0.3154338002204895, |
|
"learning_rate": 0.0001565525383707202, |
|
"loss": 0.7735, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22417840375586853, |
|
"grad_norm": 0.3758296072483063, |
|
"learning_rate": 0.0001563164108618654, |
|
"loss": 0.8241, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.22535211267605634, |
|
"grad_norm": 0.3732200264930725, |
|
"learning_rate": 0.00015608028335301063, |
|
"loss": 0.8116, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.22652582159624413, |
|
"grad_norm": 0.3601556718349457, |
|
"learning_rate": 0.00015584415584415587, |
|
"loss": 0.8242, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.22769953051643194, |
|
"grad_norm": 0.360442191362381, |
|
"learning_rate": 0.00015560802833530107, |
|
"loss": 0.832, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.22887323943661972, |
|
"grad_norm": 0.35598254203796387, |
|
"learning_rate": 0.00015537190082644627, |
|
"loss": 0.8938, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2300469483568075, |
|
"grad_norm": 0.3962613046169281, |
|
"learning_rate": 0.0001551357733175915, |
|
"loss": 0.8409, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2312206572769953, |
|
"grad_norm": 0.3521510064601898, |
|
"learning_rate": 0.00015489964580873674, |
|
"loss": 0.8298, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2323943661971831, |
|
"grad_norm": 0.34407946467399597, |
|
"learning_rate": 0.00015466351829988194, |
|
"loss": 0.7921, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2335680751173709, |
|
"grad_norm": 0.3572155237197876, |
|
"learning_rate": 0.00015442739079102717, |
|
"loss": 0.8997, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.2347417840375587, |
|
"grad_norm": 0.345745712518692, |
|
"learning_rate": 0.00015419126328217238, |
|
"loss": 0.8563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23591549295774647, |
|
"grad_norm": 0.3741077780723572, |
|
"learning_rate": 0.00015395513577331758, |
|
"loss": 0.8334, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.23708920187793428, |
|
"grad_norm": 0.36866459250450134, |
|
"learning_rate": 0.00015371900826446281, |
|
"loss": 0.8398, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.23826291079812206, |
|
"grad_norm": 0.3834739625453949, |
|
"learning_rate": 0.00015348288075560805, |
|
"loss": 0.8181, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.23943661971830985, |
|
"grad_norm": 0.373045951128006, |
|
"learning_rate": 0.00015324675324675325, |
|
"loss": 0.8044, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.24061032863849766, |
|
"grad_norm": 0.3418562412261963, |
|
"learning_rate": 0.00015301062573789848, |
|
"loss": 0.8454, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.24178403755868544, |
|
"grad_norm": 0.36289098858833313, |
|
"learning_rate": 0.00015277449822904369, |
|
"loss": 0.8478, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.24295774647887325, |
|
"grad_norm": 0.38806968927383423, |
|
"learning_rate": 0.00015253837072018892, |
|
"loss": 0.804, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.24413145539906103, |
|
"grad_norm": 0.34217599034309387, |
|
"learning_rate": 0.00015230224321133412, |
|
"loss": 0.8391, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.24530516431924881, |
|
"grad_norm": 0.3738957643508911, |
|
"learning_rate": 0.00015206611570247935, |
|
"loss": 0.9026, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.24647887323943662, |
|
"grad_norm": 0.3481609523296356, |
|
"learning_rate": 0.00015182998819362458, |
|
"loss": 0.8674, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2476525821596244, |
|
"grad_norm": 0.38967254757881165, |
|
"learning_rate": 0.00015159386068476976, |
|
"loss": 0.8796, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.24882629107981222, |
|
"grad_norm": 0.34841835498809814, |
|
"learning_rate": 0.000151357733175915, |
|
"loss": 0.7913, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.33826395869255066, |
|
"learning_rate": 0.00015112160566706023, |
|
"loss": 0.8539, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2511737089201878, |
|
"grad_norm": 0.35131266713142395, |
|
"learning_rate": 0.00015088547815820543, |
|
"loss": 0.8072, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.25234741784037557, |
|
"grad_norm": 0.3298250734806061, |
|
"learning_rate": 0.00015064935064935066, |
|
"loss": 0.7688, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2535211267605634, |
|
"grad_norm": 0.33808133006095886, |
|
"learning_rate": 0.0001504132231404959, |
|
"loss": 0.7609, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2546948356807512, |
|
"grad_norm": 0.37146687507629395, |
|
"learning_rate": 0.0001501770956316411, |
|
"loss": 0.843, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.25586854460093894, |
|
"grad_norm": 0.33817118406295776, |
|
"learning_rate": 0.0001499409681227863, |
|
"loss": 0.7828, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.25704225352112675, |
|
"grad_norm": 0.35203686356544495, |
|
"learning_rate": 0.00014970484061393153, |
|
"loss": 0.8236, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.25821596244131456, |
|
"grad_norm": 0.34176716208457947, |
|
"learning_rate": 0.00014946871310507676, |
|
"loss": 0.8191, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25938967136150237, |
|
"grad_norm": 0.34649035334587097, |
|
"learning_rate": 0.00014923258559622197, |
|
"loss": 0.8284, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.2605633802816901, |
|
"grad_norm": 0.35891467332839966, |
|
"learning_rate": 0.00014899645808736717, |
|
"loss": 0.8149, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.26173708920187794, |
|
"grad_norm": 0.3408451974391937, |
|
"learning_rate": 0.0001487603305785124, |
|
"loss": 0.8049, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.26291079812206575, |
|
"grad_norm": 0.36554664373397827, |
|
"learning_rate": 0.0001485242030696576, |
|
"loss": 0.8478, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2640845070422535, |
|
"grad_norm": 0.3355228304862976, |
|
"learning_rate": 0.00014828807556080284, |
|
"loss": 0.815, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2652582159624413, |
|
"grad_norm": 0.3500598669052124, |
|
"learning_rate": 0.00014805194805194807, |
|
"loss": 0.8571, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.2664319248826291, |
|
"grad_norm": 0.3362652659416199, |
|
"learning_rate": 0.00014781582054309328, |
|
"loss": 0.8363, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.2676056338028169, |
|
"grad_norm": 0.34258243441581726, |
|
"learning_rate": 0.00014757969303423848, |
|
"loss": 0.7648, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2687793427230047, |
|
"grad_norm": 0.34023317694664, |
|
"learning_rate": 0.0001473435655253837, |
|
"loss": 0.8373, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2699530516431925, |
|
"grad_norm": 0.35829535126686096, |
|
"learning_rate": 0.00014710743801652894, |
|
"loss": 0.8255, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2711267605633803, |
|
"grad_norm": 0.3499360978603363, |
|
"learning_rate": 0.00014687131050767415, |
|
"loss": 0.8514, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.27230046948356806, |
|
"grad_norm": 0.3703480362892151, |
|
"learning_rate": 0.00014663518299881938, |
|
"loss": 0.8615, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.2734741784037559, |
|
"grad_norm": 0.3460928499698639, |
|
"learning_rate": 0.0001463990554899646, |
|
"loss": 0.7891, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.2746478873239437, |
|
"grad_norm": 0.34184372425079346, |
|
"learning_rate": 0.0001461629279811098, |
|
"loss": 0.8168, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.27582159624413144, |
|
"grad_norm": 0.34520068764686584, |
|
"learning_rate": 0.00014592680047225502, |
|
"loss": 0.8271, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.27699530516431925, |
|
"grad_norm": 0.3415423631668091, |
|
"learning_rate": 0.00014569067296340025, |
|
"loss": 0.783, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.27816901408450706, |
|
"grad_norm": 0.34584441781044006, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 0.8488, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.2793427230046948, |
|
"grad_norm": 0.33898866176605225, |
|
"learning_rate": 0.0001452184179456907, |
|
"loss": 0.8786, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.2805164319248826, |
|
"grad_norm": 0.3591814339160919, |
|
"learning_rate": 0.0001449822904368359, |
|
"loss": 0.8081, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"grad_norm": 0.34305432438850403, |
|
"learning_rate": 0.0001447461629279811, |
|
"loss": 0.7911, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2828638497652582, |
|
"grad_norm": 0.35866865515708923, |
|
"learning_rate": 0.00014451003541912633, |
|
"loss": 0.8393, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.284037558685446, |
|
"grad_norm": 0.3422331213951111, |
|
"learning_rate": 0.00014427390791027156, |
|
"loss": 0.848, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2852112676056338, |
|
"grad_norm": 0.33504337072372437, |
|
"learning_rate": 0.00014403778040141676, |
|
"loss": 0.7782, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2863849765258216, |
|
"grad_norm": 0.3509252667427063, |
|
"learning_rate": 0.000143801652892562, |
|
"loss": 0.8535, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.2875586854460094, |
|
"grad_norm": 0.3254059851169586, |
|
"learning_rate": 0.0001435655253837072, |
|
"loss": 0.7642, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2887323943661972, |
|
"grad_norm": 0.33594879508018494, |
|
"learning_rate": 0.00014332939787485243, |
|
"loss": 0.814, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.289906103286385, |
|
"grad_norm": 0.3620656132698059, |
|
"learning_rate": 0.00014309327036599764, |
|
"loss": 0.8248, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.29107981220657275, |
|
"grad_norm": 0.3325202167034149, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.7408, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.29225352112676056, |
|
"grad_norm": 0.33905264735221863, |
|
"learning_rate": 0.0001426210153482881, |
|
"loss": 0.8446, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.2934272300469484, |
|
"grad_norm": 0.3577309548854828, |
|
"learning_rate": 0.0001423848878394333, |
|
"loss": 0.784, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29460093896713613, |
|
"grad_norm": 0.3840247392654419, |
|
"learning_rate": 0.0001421487603305785, |
|
"loss": 0.8068, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.29577464788732394, |
|
"grad_norm": 0.3539847433567047, |
|
"learning_rate": 0.00014191263282172374, |
|
"loss": 0.8232, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.29694835680751175, |
|
"grad_norm": 0.33225932717323303, |
|
"learning_rate": 0.00014167650531286894, |
|
"loss": 0.7946, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2981220657276995, |
|
"grad_norm": 0.3429291546344757, |
|
"learning_rate": 0.00014144037780401418, |
|
"loss": 0.816, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2992957746478873, |
|
"grad_norm": 0.3584197163581848, |
|
"learning_rate": 0.0001412042502951594, |
|
"loss": 0.8351, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.3004694835680751, |
|
"grad_norm": 0.35585007071495056, |
|
"learning_rate": 0.0001409681227863046, |
|
"loss": 0.8255, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.30164319248826293, |
|
"grad_norm": 0.3510012924671173, |
|
"learning_rate": 0.00014073199527744982, |
|
"loss": 0.7889, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.3028169014084507, |
|
"grad_norm": 0.36646419763565063, |
|
"learning_rate": 0.00014049586776859505, |
|
"loss": 0.8161, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.3039906103286385, |
|
"grad_norm": 0.35207659006118774, |
|
"learning_rate": 0.00014025974025974028, |
|
"loss": 0.8151, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.3051643192488263, |
|
"grad_norm": 0.33348143100738525, |
|
"learning_rate": 0.00014002361275088548, |
|
"loss": 0.8108, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.30633802816901406, |
|
"grad_norm": 0.3474767506122589, |
|
"learning_rate": 0.00013978748524203072, |
|
"loss": 0.8105, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.3075117370892019, |
|
"grad_norm": 0.37046462297439575, |
|
"learning_rate": 0.00013955135773317592, |
|
"loss": 0.867, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3086854460093897, |
|
"grad_norm": 0.3426377475261688, |
|
"learning_rate": 0.00013931523022432112, |
|
"loss": 0.8281, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.30985915492957744, |
|
"grad_norm": 0.3340952694416046, |
|
"learning_rate": 0.00013907910271546636, |
|
"loss": 0.7805, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.31103286384976525, |
|
"grad_norm": 0.3546634316444397, |
|
"learning_rate": 0.0001388429752066116, |
|
"loss": 0.824, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.31220657276995306, |
|
"grad_norm": 0.3211507499217987, |
|
"learning_rate": 0.0001386068476977568, |
|
"loss": 0.7572, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.31338028169014087, |
|
"grad_norm": 0.3440265357494354, |
|
"learning_rate": 0.000138370720188902, |
|
"loss": 0.8247, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.3145539906103286, |
|
"grad_norm": 0.34174132347106934, |
|
"learning_rate": 0.00013813459268004723, |
|
"loss": 0.7939, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.31572769953051644, |
|
"grad_norm": 0.3415057361125946, |
|
"learning_rate": 0.00013789846517119246, |
|
"loss": 0.8184, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.31690140845070425, |
|
"grad_norm": 0.3313206732273102, |
|
"learning_rate": 0.00013766233766233766, |
|
"loss": 0.7936, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.318075117370892, |
|
"grad_norm": 0.35693395137786865, |
|
"learning_rate": 0.0001374262101534829, |
|
"loss": 0.7738, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.3192488262910798, |
|
"grad_norm": 0.3530910313129425, |
|
"learning_rate": 0.00013719008264462813, |
|
"loss": 0.7901, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3204225352112676, |
|
"grad_norm": 0.34867924451828003, |
|
"learning_rate": 0.0001369539551357733, |
|
"loss": 0.8281, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.3215962441314554, |
|
"grad_norm": 0.34141889214515686, |
|
"learning_rate": 0.00013671782762691854, |
|
"loss": 0.7987, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3227699530516432, |
|
"grad_norm": 0.3511849045753479, |
|
"learning_rate": 0.00013648170011806377, |
|
"loss": 0.8306, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.323943661971831, |
|
"grad_norm": 0.343523770570755, |
|
"learning_rate": 0.00013624557260920897, |
|
"loss": 0.7813, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.32511737089201875, |
|
"grad_norm": 0.3539726138114929, |
|
"learning_rate": 0.0001360094451003542, |
|
"loss": 0.8258, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.32629107981220656, |
|
"grad_norm": 0.35628989338874817, |
|
"learning_rate": 0.00013577331759149943, |
|
"loss": 0.829, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.3274647887323944, |
|
"grad_norm": 0.3531114459037781, |
|
"learning_rate": 0.00013553719008264464, |
|
"loss": 0.8475, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.3286384976525822, |
|
"grad_norm": 0.35344576835632324, |
|
"learning_rate": 0.00013530106257378984, |
|
"loss": 0.8343, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.32981220657276994, |
|
"grad_norm": 0.37604016065597534, |
|
"learning_rate": 0.00013506493506493507, |
|
"loss": 0.7598, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.33098591549295775, |
|
"grad_norm": 0.35646241903305054, |
|
"learning_rate": 0.0001348288075560803, |
|
"loss": 0.83, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.33215962441314556, |
|
"grad_norm": 0.36084675788879395, |
|
"learning_rate": 0.0001345926800472255, |
|
"loss": 0.7465, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.3514406085014343, |
|
"learning_rate": 0.00013435655253837071, |
|
"loss": 0.7979, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3345070422535211, |
|
"grad_norm": 0.3554603159427643, |
|
"learning_rate": 0.00013412042502951595, |
|
"loss": 0.8487, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.33568075117370894, |
|
"grad_norm": 0.3360341787338257, |
|
"learning_rate": 0.00013388429752066115, |
|
"loss": 0.7787, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3368544600938967, |
|
"grad_norm": 0.35026323795318604, |
|
"learning_rate": 0.00013364817001180638, |
|
"loss": 0.7845, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.3380281690140845, |
|
"grad_norm": 0.3419228494167328, |
|
"learning_rate": 0.00013341204250295161, |
|
"loss": 0.7971, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.3392018779342723, |
|
"grad_norm": 0.3314400315284729, |
|
"learning_rate": 0.00013317591499409682, |
|
"loss": 0.7899, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.3403755868544601, |
|
"grad_norm": 0.3434331715106964, |
|
"learning_rate": 0.00013293978748524202, |
|
"loss": 0.827, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3415492957746479, |
|
"grad_norm": 0.34718382358551025, |
|
"learning_rate": 0.00013270365997638725, |
|
"loss": 0.7835, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.3427230046948357, |
|
"grad_norm": 0.3585168421268463, |
|
"learning_rate": 0.00013246753246753249, |
|
"loss": 0.8728, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3438967136150235, |
|
"grad_norm": 0.3508673906326294, |
|
"learning_rate": 0.0001322314049586777, |
|
"loss": 0.836, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.34507042253521125, |
|
"grad_norm": 0.40241560339927673, |
|
"learning_rate": 0.00013199527744982292, |
|
"loss": 0.8043, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.34624413145539906, |
|
"grad_norm": 0.33775267004966736, |
|
"learning_rate": 0.00013175914994096813, |
|
"loss": 0.8047, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3474178403755869, |
|
"grad_norm": 0.3423898220062256, |
|
"learning_rate": 0.00013152302243211333, |
|
"loss": 0.7894, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.3485915492957746, |
|
"grad_norm": 0.3472992479801178, |
|
"learning_rate": 0.00013128689492325856, |
|
"loss": 0.8198, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.34976525821596244, |
|
"grad_norm": 0.3425481915473938, |
|
"learning_rate": 0.0001310507674144038, |
|
"loss": 0.8178, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.35093896713615025, |
|
"grad_norm": 0.3459112048149109, |
|
"learning_rate": 0.000130814639905549, |
|
"loss": 0.7749, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.352112676056338, |
|
"grad_norm": 0.353595495223999, |
|
"learning_rate": 0.00013057851239669423, |
|
"loss": 0.7886, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3532863849765258, |
|
"grad_norm": 0.35495465993881226, |
|
"learning_rate": 0.00013034238488783943, |
|
"loss": 0.771, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.3544600938967136, |
|
"grad_norm": 0.34812483191490173, |
|
"learning_rate": 0.00013010625737898467, |
|
"loss": 0.8335, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.35563380281690143, |
|
"grad_norm": 0.3655085861682892, |
|
"learning_rate": 0.00012987012987012987, |
|
"loss": 0.8117, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.3568075117370892, |
|
"grad_norm": 0.35925915837287903, |
|
"learning_rate": 0.0001296340023612751, |
|
"loss": 0.8147, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.357981220657277, |
|
"grad_norm": 0.3293222486972809, |
|
"learning_rate": 0.00012939787485242033, |
|
"loss": 0.7602, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.3591549295774648, |
|
"grad_norm": 0.3486446738243103, |
|
"learning_rate": 0.00012916174734356554, |
|
"loss": 0.7857, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.36032863849765256, |
|
"grad_norm": 0.382565975189209, |
|
"learning_rate": 0.00012892561983471074, |
|
"loss": 0.863, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.3615023474178404, |
|
"grad_norm": 0.32544344663619995, |
|
"learning_rate": 0.00012868949232585597, |
|
"loss": 0.781, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.3626760563380282, |
|
"grad_norm": 0.38700491189956665, |
|
"learning_rate": 0.00012845336481700118, |
|
"loss": 0.8102, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.36384976525821594, |
|
"grad_norm": 0.3503759503364563, |
|
"learning_rate": 0.0001282172373081464, |
|
"loss": 0.7699, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.36502347417840375, |
|
"grad_norm": 0.3323630094528198, |
|
"learning_rate": 0.00012798110979929164, |
|
"loss": 0.7511, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.36619718309859156, |
|
"grad_norm": 0.3668995797634125, |
|
"learning_rate": 0.00012774498229043685, |
|
"loss": 0.7374, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.3673708920187793, |
|
"grad_norm": 0.37373387813568115, |
|
"learning_rate": 0.00012750885478158205, |
|
"loss": 0.8077, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.3685446009389671, |
|
"grad_norm": 0.3601135015487671, |
|
"learning_rate": 0.00012727272727272728, |
|
"loss": 0.7991, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.36971830985915494, |
|
"grad_norm": 0.3527435064315796, |
|
"learning_rate": 0.00012703659976387249, |
|
"loss": 0.7971, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.37089201877934275, |
|
"grad_norm": 0.3584372401237488, |
|
"learning_rate": 0.00012680047225501772, |
|
"loss": 0.7513, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.3720657276995305, |
|
"grad_norm": 0.3517726957798004, |
|
"learning_rate": 0.00012656434474616295, |
|
"loss": 0.8206, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.3732394366197183, |
|
"grad_norm": 0.3655302822589874, |
|
"learning_rate": 0.00012632821723730815, |
|
"loss": 0.771, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.3744131455399061, |
|
"grad_norm": 0.3659893274307251, |
|
"learning_rate": 0.00012609208972845336, |
|
"loss": 0.8048, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.3755868544600939, |
|
"grad_norm": 0.36364591121673584, |
|
"learning_rate": 0.0001258559622195986, |
|
"loss": 0.7832, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3767605633802817, |
|
"grad_norm": 0.37528395652770996, |
|
"learning_rate": 0.00012561983471074382, |
|
"loss": 0.7926, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.3779342723004695, |
|
"grad_norm": 0.37137654423713684, |
|
"learning_rate": 0.00012538370720188903, |
|
"loss": 0.8486, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.37910798122065725, |
|
"grad_norm": 0.3466728925704956, |
|
"learning_rate": 0.00012514757969303423, |
|
"loss": 0.7961, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.38028169014084506, |
|
"grad_norm": 0.38629114627838135, |
|
"learning_rate": 0.00012491145218417946, |
|
"loss": 0.8071, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.3814553990610329, |
|
"grad_norm": 0.34686383605003357, |
|
"learning_rate": 0.00012467532467532467, |
|
"loss": 0.7698, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3826291079812207, |
|
"grad_norm": 0.36625292897224426, |
|
"learning_rate": 0.0001244391971664699, |
|
"loss": 0.8486, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.38380281690140844, |
|
"grad_norm": 0.38903650641441345, |
|
"learning_rate": 0.00012420306965761513, |
|
"loss": 0.8031, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.38497652582159625, |
|
"grad_norm": 0.3456287980079651, |
|
"learning_rate": 0.00012396694214876033, |
|
"loss": 0.7887, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.38615023474178406, |
|
"grad_norm": 0.36374613642692566, |
|
"learning_rate": 0.00012373081463990554, |
|
"loss": 0.7588, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3873239436619718, |
|
"grad_norm": 0.360626220703125, |
|
"learning_rate": 0.00012349468713105077, |
|
"loss": 0.8239, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3884976525821596, |
|
"grad_norm": 0.40213796496391296, |
|
"learning_rate": 0.000123258559622196, |
|
"loss": 0.8029, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.38967136150234744, |
|
"grad_norm": 0.3273613750934601, |
|
"learning_rate": 0.0001230224321133412, |
|
"loss": 0.7567, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.3908450704225352, |
|
"grad_norm": 0.34953057765960693, |
|
"learning_rate": 0.00012278630460448644, |
|
"loss": 0.7512, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.392018779342723, |
|
"grad_norm": 0.34772762656211853, |
|
"learning_rate": 0.00012255017709563167, |
|
"loss": 0.7551, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3931924882629108, |
|
"grad_norm": 0.34170207381248474, |
|
"learning_rate": 0.00012231404958677685, |
|
"loss": 0.7884, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.39436619718309857, |
|
"grad_norm": 0.3696103096008301, |
|
"learning_rate": 0.00012207792207792208, |
|
"loss": 0.8658, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.3955399061032864, |
|
"grad_norm": 0.3513827621936798, |
|
"learning_rate": 0.00012184179456906731, |
|
"loss": 0.8199, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.3967136150234742, |
|
"grad_norm": 0.3454856872558594, |
|
"learning_rate": 0.00012160566706021253, |
|
"loss": 0.7627, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.397887323943662, |
|
"grad_norm": 0.3246639370918274, |
|
"learning_rate": 0.00012136953955135774, |
|
"loss": 0.7454, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.39906103286384975, |
|
"grad_norm": 0.33567938208580017, |
|
"learning_rate": 0.00012113341204250295, |
|
"loss": 0.7611, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.40023474178403756, |
|
"grad_norm": 0.33728334307670593, |
|
"learning_rate": 0.00012089728453364817, |
|
"loss": 0.7575, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.4014084507042254, |
|
"grad_norm": 0.35161352157592773, |
|
"learning_rate": 0.0001206611570247934, |
|
"loss": 0.8117, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.4025821596244131, |
|
"grad_norm": 0.3425585925579071, |
|
"learning_rate": 0.00012042502951593862, |
|
"loss": 0.8019, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.40375586854460094, |
|
"grad_norm": 0.3406507968902588, |
|
"learning_rate": 0.00012018890200708383, |
|
"loss": 0.8235, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.40492957746478875, |
|
"grad_norm": 0.37840309739112854, |
|
"learning_rate": 0.00011995277449822907, |
|
"loss": 0.7866, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.4061032863849765, |
|
"grad_norm": 0.35816213488578796, |
|
"learning_rate": 0.00011971664698937426, |
|
"loss": 0.8425, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.4072769953051643, |
|
"grad_norm": 0.3441546559333801, |
|
"learning_rate": 0.00011948051948051949, |
|
"loss": 0.8094, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.4084507042253521, |
|
"grad_norm": 0.34275054931640625, |
|
"learning_rate": 0.0001192443919716647, |
|
"loss": 0.7244, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.4096244131455399, |
|
"grad_norm": 0.33207401633262634, |
|
"learning_rate": 0.00011900826446280992, |
|
"loss": 0.8108, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.4107981220657277, |
|
"grad_norm": 0.3412252962589264, |
|
"learning_rate": 0.00011877213695395516, |
|
"loss": 0.7818, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4119718309859155, |
|
"grad_norm": 0.36701643466949463, |
|
"learning_rate": 0.00011853600944510035, |
|
"loss": 0.8293, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.4131455399061033, |
|
"grad_norm": 0.34462520480155945, |
|
"learning_rate": 0.00011829988193624558, |
|
"loss": 0.7603, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.41431924882629106, |
|
"grad_norm": 0.35232508182525635, |
|
"learning_rate": 0.0001180637544273908, |
|
"loss": 0.7616, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.4154929577464789, |
|
"grad_norm": 0.37428373098373413, |
|
"learning_rate": 0.00011782762691853601, |
|
"loss": 0.7919, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 0.3429507911205292, |
|
"learning_rate": 0.00011759149940968123, |
|
"loss": 0.7859, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.41784037558685444, |
|
"grad_norm": 0.3584844470024109, |
|
"learning_rate": 0.00011735537190082646, |
|
"loss": 0.7934, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.41901408450704225, |
|
"grad_norm": 0.356391578912735, |
|
"learning_rate": 0.00011711924439197165, |
|
"loss": 0.8222, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.42018779342723006, |
|
"grad_norm": 0.3663417100906372, |
|
"learning_rate": 0.00011688311688311689, |
|
"loss": 0.7507, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.4213615023474178, |
|
"grad_norm": 0.3388553559780121, |
|
"learning_rate": 0.0001166469893742621, |
|
"loss": 0.8263, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"grad_norm": 0.34876593947410583, |
|
"learning_rate": 0.00011641086186540732, |
|
"loss": 0.7969, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.42370892018779344, |
|
"grad_norm": 0.3500271737575531, |
|
"learning_rate": 0.00011617473435655255, |
|
"loss": 0.7789, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.42488262910798125, |
|
"grad_norm": 0.3554798662662506, |
|
"learning_rate": 0.00011593860684769777, |
|
"loss": 0.7681, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.426056338028169, |
|
"grad_norm": 0.34559762477874756, |
|
"learning_rate": 0.00011570247933884298, |
|
"loss": 0.7676, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.4272300469483568, |
|
"grad_norm": 0.3520505726337433, |
|
"learning_rate": 0.0001154663518299882, |
|
"loss": 0.7494, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.4284037558685446, |
|
"grad_norm": 0.35454803705215454, |
|
"learning_rate": 0.00011523022432113341, |
|
"loss": 0.7516, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.4295774647887324, |
|
"grad_norm": 0.36526602506637573, |
|
"learning_rate": 0.00011499409681227864, |
|
"loss": 0.7789, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.4307511737089202, |
|
"grad_norm": 0.34084445238113403, |
|
"learning_rate": 0.00011475796930342386, |
|
"loss": 0.7446, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.431924882629108, |
|
"grad_norm": 0.3405500054359436, |
|
"learning_rate": 0.00011452184179456907, |
|
"loss": 0.8217, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.43309859154929575, |
|
"grad_norm": 0.3523256182670593, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.7311, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.43427230046948356, |
|
"grad_norm": 0.3336530327796936, |
|
"learning_rate": 0.0001140495867768595, |
|
"loss": 0.7806, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4354460093896714, |
|
"grad_norm": 0.3268769383430481, |
|
"learning_rate": 0.00011381345926800473, |
|
"loss": 0.7945, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.43661971830985913, |
|
"grad_norm": 0.35258617997169495, |
|
"learning_rate": 0.00011357733175914995, |
|
"loss": 0.7468, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.43779342723004694, |
|
"grad_norm": 0.3546913266181946, |
|
"learning_rate": 0.00011334120425029517, |
|
"loss": 0.7921, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.43896713615023475, |
|
"grad_norm": 0.36266180872917175, |
|
"learning_rate": 0.00011310507674144037, |
|
"loss": 0.7623, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.44014084507042256, |
|
"grad_norm": 0.3355543613433838, |
|
"learning_rate": 0.00011286894923258559, |
|
"loss": 0.7436, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4413145539906103, |
|
"grad_norm": 0.33666127920150757, |
|
"learning_rate": 0.00011263282172373082, |
|
"loss": 0.7609, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.4424882629107981, |
|
"grad_norm": 0.3505670428276062, |
|
"learning_rate": 0.00011239669421487604, |
|
"loss": 0.7868, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.44366197183098594, |
|
"grad_norm": 0.3446255028247833, |
|
"learning_rate": 0.00011216056670602126, |
|
"loss": 0.765, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.4448356807511737, |
|
"grad_norm": 0.3761040270328522, |
|
"learning_rate": 0.00011192443919716649, |
|
"loss": 0.8104, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.4460093896713615, |
|
"grad_norm": 0.35692986845970154, |
|
"learning_rate": 0.00011168831168831168, |
|
"loss": 0.7896, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4471830985915493, |
|
"grad_norm": 0.34384050965309143, |
|
"learning_rate": 0.00011145218417945691, |
|
"loss": 0.7716, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.44835680751173707, |
|
"grad_norm": 0.3477395176887512, |
|
"learning_rate": 0.00011121605667060213, |
|
"loss": 0.8146, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.4495305164319249, |
|
"grad_norm": 0.35172998905181885, |
|
"learning_rate": 0.00011097992916174735, |
|
"loss": 0.7844, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.4507042253521127, |
|
"grad_norm": 0.33881857991218567, |
|
"learning_rate": 0.00011074380165289258, |
|
"loss": 0.7528, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.4518779342723005, |
|
"grad_norm": 0.3429534137248993, |
|
"learning_rate": 0.00011050767414403777, |
|
"loss": 0.7826, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.45305164319248825, |
|
"grad_norm": 0.34472665190696716, |
|
"learning_rate": 0.000110271546635183, |
|
"loss": 0.7153, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.45422535211267606, |
|
"grad_norm": 0.3572479486465454, |
|
"learning_rate": 0.00011003541912632822, |
|
"loss": 0.7811, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.45539906103286387, |
|
"grad_norm": 0.3531682789325714, |
|
"learning_rate": 0.00010979929161747344, |
|
"loss": 0.8016, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.4565727699530516, |
|
"grad_norm": 0.3845299780368805, |
|
"learning_rate": 0.00010956316410861867, |
|
"loss": 0.7817, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.45774647887323944, |
|
"grad_norm": 0.35217660665512085, |
|
"learning_rate": 0.00010932703659976389, |
|
"loss": 0.7495, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.45892018779342725, |
|
"grad_norm": 0.35103702545166016, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 0.7602, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.460093896713615, |
|
"grad_norm": 0.3511259853839874, |
|
"learning_rate": 0.00010885478158205431, |
|
"loss": 0.7923, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.4612676056338028, |
|
"grad_norm": 0.33732983469963074, |
|
"learning_rate": 0.00010861865407319953, |
|
"loss": 0.7875, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.4624413145539906, |
|
"grad_norm": 0.35035955905914307, |
|
"learning_rate": 0.00010838252656434476, |
|
"loss": 0.7737, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.4636150234741784, |
|
"grad_norm": 0.3277076482772827, |
|
"learning_rate": 0.00010814639905548998, |
|
"loss": 0.7619, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.4647887323943662, |
|
"grad_norm": 0.34461456537246704, |
|
"learning_rate": 0.00010791027154663518, |
|
"loss": 0.7394, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.465962441314554, |
|
"grad_norm": 0.36000820994377136, |
|
"learning_rate": 0.0001076741440377804, |
|
"loss": 0.8004, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.4671361502347418, |
|
"grad_norm": 0.3291054666042328, |
|
"learning_rate": 0.00010743801652892562, |
|
"loss": 0.721, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.46830985915492956, |
|
"grad_norm": 0.37541574239730835, |
|
"learning_rate": 0.00010720188902007085, |
|
"loss": 0.7673, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.4694835680751174, |
|
"grad_norm": 0.33268067240715027, |
|
"learning_rate": 0.00010696576151121607, |
|
"loss": 0.7439, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4706572769953052, |
|
"grad_norm": 0.34383484721183777, |
|
"learning_rate": 0.00010672963400236129, |
|
"loss": 0.7453, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.47183098591549294, |
|
"grad_norm": 0.3543702960014343, |
|
"learning_rate": 0.00010649350649350649, |
|
"loss": 0.7544, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.47300469483568075, |
|
"grad_norm": 0.34553685784339905, |
|
"learning_rate": 0.00010625737898465171, |
|
"loss": 0.7656, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.47417840375586856, |
|
"grad_norm": 0.3437071144580841, |
|
"learning_rate": 0.00010602125147579694, |
|
"loss": 0.773, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.4753521126760563, |
|
"grad_norm": 0.34917253255844116, |
|
"learning_rate": 0.00010578512396694216, |
|
"loss": 0.7607, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.4765258215962441, |
|
"grad_norm": 0.33429262042045593, |
|
"learning_rate": 0.00010554899645808738, |
|
"loss": 0.768, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.47769953051643194, |
|
"grad_norm": 0.33842045068740845, |
|
"learning_rate": 0.00010531286894923261, |
|
"loss": 0.7665, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.4788732394366197, |
|
"grad_norm": 0.3419265151023865, |
|
"learning_rate": 0.0001050767414403778, |
|
"loss": 0.7717, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.4800469483568075, |
|
"grad_norm": 0.3458483815193176, |
|
"learning_rate": 0.00010484061393152303, |
|
"loss": 0.8031, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.4812206572769953, |
|
"grad_norm": 0.37077274918556213, |
|
"learning_rate": 0.00010460448642266825, |
|
"loss": 0.8009, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4823943661971831, |
|
"grad_norm": 0.35040315985679626, |
|
"learning_rate": 0.00010436835891381347, |
|
"loss": 0.7545, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.4835680751173709, |
|
"grad_norm": 0.3503456115722656, |
|
"learning_rate": 0.0001041322314049587, |
|
"loss": 0.8515, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.4847417840375587, |
|
"grad_norm": 0.34627342224121094, |
|
"learning_rate": 0.00010389610389610389, |
|
"loss": 0.716, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.4859154929577465, |
|
"grad_norm": 0.3596992790699005, |
|
"learning_rate": 0.00010365997638724912, |
|
"loss": 0.7636, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.48708920187793425, |
|
"grad_norm": 0.3346829116344452, |
|
"learning_rate": 0.00010342384887839434, |
|
"loss": 0.7635, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.48826291079812206, |
|
"grad_norm": 0.37179237604141235, |
|
"learning_rate": 0.00010318772136953956, |
|
"loss": 0.7642, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.4894366197183099, |
|
"grad_norm": 0.34897381067276, |
|
"learning_rate": 0.00010295159386068479, |
|
"loss": 0.7792, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.49061032863849763, |
|
"grad_norm": 0.3820830285549164, |
|
"learning_rate": 0.00010271546635183, |
|
"loss": 0.7722, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.49178403755868544, |
|
"grad_norm": 0.3688552677631378, |
|
"learning_rate": 0.00010247933884297521, |
|
"loss": 0.7927, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.49295774647887325, |
|
"grad_norm": 0.35100415349006653, |
|
"learning_rate": 0.00010224321133412043, |
|
"loss": 0.7848, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.49413145539906106, |
|
"grad_norm": 0.3596225082874298, |
|
"learning_rate": 0.00010200708382526565, |
|
"loss": 0.7383, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.4953051643192488, |
|
"grad_norm": 0.36203423142433167, |
|
"learning_rate": 0.00010177095631641088, |
|
"loss": 0.769, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.4964788732394366, |
|
"grad_norm": 0.3776590824127197, |
|
"learning_rate": 0.0001015348288075561, |
|
"loss": 0.8007, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.49765258215962443, |
|
"grad_norm": 0.36009421944618225, |
|
"learning_rate": 0.0001012987012987013, |
|
"loss": 0.7557, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.4988262910798122, |
|
"grad_norm": 0.3442706763744354, |
|
"learning_rate": 0.00010106257378984652, |
|
"loss": 0.7488, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.3635407090187073, |
|
"learning_rate": 0.00010082644628099174, |
|
"loss": 0.7922, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.5011737089201878, |
|
"grad_norm": 0.3766370117664337, |
|
"learning_rate": 0.00010059031877213697, |
|
"loss": 0.7818, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.5023474178403756, |
|
"grad_norm": 0.34344202280044556, |
|
"learning_rate": 0.00010035419126328218, |
|
"loss": 0.8308, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.5035211267605634, |
|
"grad_norm": 0.3495674133300781, |
|
"learning_rate": 0.0001001180637544274, |
|
"loss": 0.799, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.5046948356807511, |
|
"grad_norm": 0.36545464396476746, |
|
"learning_rate": 9.988193624557262e-05, |
|
"loss": 0.7453, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.505868544600939, |
|
"grad_norm": 0.3482630252838135, |
|
"learning_rate": 9.964580873671782e-05, |
|
"loss": 0.7422, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.5070422535211268, |
|
"grad_norm": 0.3745418190956116, |
|
"learning_rate": 9.940968122786304e-05, |
|
"loss": 0.7333, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.5082159624413145, |
|
"grad_norm": 0.3470025062561035, |
|
"learning_rate": 9.917355371900827e-05, |
|
"loss": 0.7907, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.5093896713615024, |
|
"grad_norm": 0.38251325488090515, |
|
"learning_rate": 9.893742621015348e-05, |
|
"loss": 0.7629, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.5105633802816901, |
|
"grad_norm": 0.3829626739025116, |
|
"learning_rate": 9.870129870129871e-05, |
|
"loss": 0.7939, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.5117370892018779, |
|
"grad_norm": 0.35726287961006165, |
|
"learning_rate": 9.846517119244393e-05, |
|
"loss": 0.755, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.5129107981220657, |
|
"grad_norm": 0.38168108463287354, |
|
"learning_rate": 9.822904368358913e-05, |
|
"loss": 0.7396, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.5140845070422535, |
|
"grad_norm": 0.35728660225868225, |
|
"learning_rate": 9.799291617473436e-05, |
|
"loss": 0.7568, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.5152582159624414, |
|
"grad_norm": 0.37819668650627136, |
|
"learning_rate": 9.775678866587958e-05, |
|
"loss": 0.8046, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.5164319248826291, |
|
"grad_norm": 0.4106784760951996, |
|
"learning_rate": 9.75206611570248e-05, |
|
"loss": 0.7116, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5176056338028169, |
|
"grad_norm": 0.3476578891277313, |
|
"learning_rate": 9.728453364817002e-05, |
|
"loss": 0.7824, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.5187793427230047, |
|
"grad_norm": 0.36705800890922546, |
|
"learning_rate": 9.704840613931524e-05, |
|
"loss": 0.7631, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.5199530516431925, |
|
"grad_norm": 0.3880864977836609, |
|
"learning_rate": 9.681227863046045e-05, |
|
"loss": 0.7608, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.5211267605633803, |
|
"grad_norm": 0.3610959053039551, |
|
"learning_rate": 9.657615112160567e-05, |
|
"loss": 0.7909, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5223004694835681, |
|
"grad_norm": 0.33494657278060913, |
|
"learning_rate": 9.634002361275089e-05, |
|
"loss": 0.7108, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.5234741784037559, |
|
"grad_norm": 0.352055162191391, |
|
"learning_rate": 9.610389610389611e-05, |
|
"loss": 0.7177, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.5246478873239436, |
|
"grad_norm": 0.35466742515563965, |
|
"learning_rate": 9.586776859504133e-05, |
|
"loss": 0.7762, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.5258215962441315, |
|
"grad_norm": 0.34477657079696655, |
|
"learning_rate": 9.563164108618654e-05, |
|
"loss": 0.7583, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.5269953051643192, |
|
"grad_norm": 0.37008315324783325, |
|
"learning_rate": 9.539551357733176e-05, |
|
"loss": 0.7954, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.528169014084507, |
|
"grad_norm": 0.34141793847084045, |
|
"learning_rate": 9.515938606847698e-05, |
|
"loss": 0.7444, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5293427230046949, |
|
"grad_norm": 0.3429400622844696, |
|
"learning_rate": 9.49232585596222e-05, |
|
"loss": 0.7499, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.5305164319248826, |
|
"grad_norm": 0.3666730225086212, |
|
"learning_rate": 9.468713105076742e-05, |
|
"loss": 0.7704, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.5316901408450704, |
|
"grad_norm": 0.34185874462127686, |
|
"learning_rate": 9.445100354191265e-05, |
|
"loss": 0.7446, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.5328638497652582, |
|
"grad_norm": 0.3718375861644745, |
|
"learning_rate": 9.421487603305785e-05, |
|
"loss": 0.7316, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.534037558685446, |
|
"grad_norm": 0.35064697265625, |
|
"learning_rate": 9.397874852420307e-05, |
|
"loss": 0.7651, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.5352112676056338, |
|
"grad_norm": 0.3724139630794525, |
|
"learning_rate": 9.37426210153483e-05, |
|
"loss": 0.7639, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.5363849765258216, |
|
"grad_norm": 0.3420800566673279, |
|
"learning_rate": 9.35064935064935e-05, |
|
"loss": 0.7578, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.5375586854460094, |
|
"grad_norm": 0.3437943160533905, |
|
"learning_rate": 9.327036599763874e-05, |
|
"loss": 0.7898, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.5387323943661971, |
|
"grad_norm": 0.3799413740634918, |
|
"learning_rate": 9.303423848878394e-05, |
|
"loss": 0.7216, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.539906103286385, |
|
"grad_norm": 0.35702013969421387, |
|
"learning_rate": 9.279811097992916e-05, |
|
"loss": 0.7509, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5410798122065728, |
|
"grad_norm": 0.36074140667915344, |
|
"learning_rate": 9.256198347107439e-05, |
|
"loss": 0.7448, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.5422535211267606, |
|
"grad_norm": 0.34211182594299316, |
|
"learning_rate": 9.23258559622196e-05, |
|
"loss": 0.7143, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.5434272300469484, |
|
"grad_norm": 0.3816893398761749, |
|
"learning_rate": 9.208972845336483e-05, |
|
"loss": 0.7178, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.5446009389671361, |
|
"grad_norm": 0.36033767461776733, |
|
"learning_rate": 9.185360094451005e-05, |
|
"loss": 0.7406, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.545774647887324, |
|
"grad_norm": 0.38050010800361633, |
|
"learning_rate": 9.161747343565525e-05, |
|
"loss": 0.7528, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.5469483568075117, |
|
"grad_norm": 0.3648395240306854, |
|
"learning_rate": 9.138134592680048e-05, |
|
"loss": 0.7802, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.5481220657276995, |
|
"grad_norm": 0.35185542702674866, |
|
"learning_rate": 9.11452184179457e-05, |
|
"loss": 0.7489, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.5492957746478874, |
|
"grad_norm": 0.3487717807292938, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 0.7742, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.5504694835680751, |
|
"grad_norm": 0.36121654510498047, |
|
"learning_rate": 9.067296340023614e-05, |
|
"loss": 0.7974, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.5516431924882629, |
|
"grad_norm": 0.3470339775085449, |
|
"learning_rate": 9.043683589138135e-05, |
|
"loss": 0.723, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5528169014084507, |
|
"grad_norm": 0.33549764752388, |
|
"learning_rate": 9.020070838252657e-05, |
|
"loss": 0.7334, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.5539906103286385, |
|
"grad_norm": 0.36101868748664856, |
|
"learning_rate": 8.996458087367179e-05, |
|
"loss": 0.6817, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.5551643192488263, |
|
"grad_norm": 0.36847153306007385, |
|
"learning_rate": 8.9728453364817e-05, |
|
"loss": 0.7942, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.5563380281690141, |
|
"grad_norm": 0.3564891815185547, |
|
"learning_rate": 8.949232585596222e-05, |
|
"loss": 0.7071, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.5575117370892019, |
|
"grad_norm": 0.36866652965545654, |
|
"learning_rate": 8.925619834710744e-05, |
|
"loss": 0.7685, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5586854460093896, |
|
"grad_norm": 0.370924711227417, |
|
"learning_rate": 8.902007083825266e-05, |
|
"loss": 0.7313, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.5598591549295775, |
|
"grad_norm": 0.3611142039299011, |
|
"learning_rate": 8.878394332939788e-05, |
|
"loss": 0.7666, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.5610328638497653, |
|
"grad_norm": 0.3418121635913849, |
|
"learning_rate": 8.85478158205431e-05, |
|
"loss": 0.7194, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.562206572769953, |
|
"grad_norm": 0.3478650748729706, |
|
"learning_rate": 8.831168831168831e-05, |
|
"loss": 0.7145, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"grad_norm": 0.3567008078098297, |
|
"learning_rate": 8.807556080283353e-05, |
|
"loss": 0.7591, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5645539906103286, |
|
"grad_norm": 0.3629607558250427, |
|
"learning_rate": 8.783943329397875e-05, |
|
"loss": 0.7856, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.5657276995305164, |
|
"grad_norm": 0.37257978320121765, |
|
"learning_rate": 8.760330578512397e-05, |
|
"loss": 0.709, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5669014084507042, |
|
"grad_norm": 0.3570626676082611, |
|
"learning_rate": 8.736717827626919e-05, |
|
"loss": 0.7639, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.568075117370892, |
|
"grad_norm": 0.34790506958961487, |
|
"learning_rate": 8.713105076741442e-05, |
|
"loss": 0.7375, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5692488262910798, |
|
"grad_norm": 0.3525756895542145, |
|
"learning_rate": 8.689492325855962e-05, |
|
"loss": 0.7274, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5704225352112676, |
|
"grad_norm": 0.3545394837856293, |
|
"learning_rate": 8.665879574970484e-05, |
|
"loss": 0.7531, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.5715962441314554, |
|
"grad_norm": 0.35677066445350647, |
|
"learning_rate": 8.642266824085006e-05, |
|
"loss": 0.7682, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.5727699530516432, |
|
"grad_norm": 0.3439461290836334, |
|
"learning_rate": 8.618654073199528e-05, |
|
"loss": 0.7176, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.573943661971831, |
|
"grad_norm": 0.3622515797615051, |
|
"learning_rate": 8.595041322314051e-05, |
|
"loss": 0.7004, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.5751173708920188, |
|
"grad_norm": 0.36056646704673767, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.74, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5762910798122066, |
|
"grad_norm": 0.3509630262851715, |
|
"learning_rate": 8.547815820543093e-05, |
|
"loss": 0.8006, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.5774647887323944, |
|
"grad_norm": 0.3422422707080841, |
|
"learning_rate": 8.524203069657616e-05, |
|
"loss": 0.7162, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.5786384976525821, |
|
"grad_norm": 0.35553744435310364, |
|
"learning_rate": 8.500590318772137e-05, |
|
"loss": 0.7554, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.57981220657277, |
|
"grad_norm": 0.3443603813648224, |
|
"learning_rate": 8.47697756788666e-05, |
|
"loss": 0.7128, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.5809859154929577, |
|
"grad_norm": 0.3314555883407593, |
|
"learning_rate": 8.453364817001182e-05, |
|
"loss": 0.7123, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5821596244131455, |
|
"grad_norm": 0.33951112627983093, |
|
"learning_rate": 8.429752066115702e-05, |
|
"loss": 0.7501, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 0.327809602022171, |
|
"learning_rate": 8.406139315230225e-05, |
|
"loss": 0.7543, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.5845070422535211, |
|
"grad_norm": 0.33205023407936096, |
|
"learning_rate": 8.382526564344747e-05, |
|
"loss": 0.7395, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5856807511737089, |
|
"grad_norm": 0.3762659430503845, |
|
"learning_rate": 8.358913813459269e-05, |
|
"loss": 0.7424, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.5868544600938967, |
|
"grad_norm": 0.3421575427055359, |
|
"learning_rate": 8.33530106257379e-05, |
|
"loss": 0.7167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5880281690140845, |
|
"grad_norm": 0.3560996353626251, |
|
"learning_rate": 8.311688311688312e-05, |
|
"loss": 0.7464, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.5892018779342723, |
|
"grad_norm": 0.3566039800643921, |
|
"learning_rate": 8.288075560802834e-05, |
|
"loss": 0.715, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.5903755868544601, |
|
"grad_norm": 0.3481593430042267, |
|
"learning_rate": 8.264462809917356e-05, |
|
"loss": 0.7506, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.5915492957746479, |
|
"grad_norm": 0.34428590536117554, |
|
"learning_rate": 8.240850059031878e-05, |
|
"loss": 0.7272, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.5927230046948356, |
|
"grad_norm": 0.35629555583000183, |
|
"learning_rate": 8.2172373081464e-05, |
|
"loss": 0.7334, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.5938967136150235, |
|
"grad_norm": 0.37292811274528503, |
|
"learning_rate": 8.193624557260921e-05, |
|
"loss": 0.7505, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.5950704225352113, |
|
"grad_norm": 0.359614759683609, |
|
"learning_rate": 8.170011806375443e-05, |
|
"loss": 0.8006, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.596244131455399, |
|
"grad_norm": 0.3388945460319519, |
|
"learning_rate": 8.146399055489965e-05, |
|
"loss": 0.7542, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.5974178403755869, |
|
"grad_norm": 0.3528054356575012, |
|
"learning_rate": 8.122786304604487e-05, |
|
"loss": 0.7412, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.5985915492957746, |
|
"grad_norm": 0.3354608416557312, |
|
"learning_rate": 8.099173553719009e-05, |
|
"loss": 0.7062, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5997652582159625, |
|
"grad_norm": 0.35168859362602234, |
|
"learning_rate": 8.07556080283353e-05, |
|
"loss": 0.7653, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.6009389671361502, |
|
"grad_norm": 0.33843398094177246, |
|
"learning_rate": 8.051948051948052e-05, |
|
"loss": 0.7339, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.602112676056338, |
|
"grad_norm": 0.32910212874412537, |
|
"learning_rate": 8.028335301062574e-05, |
|
"loss": 0.6966, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.6032863849765259, |
|
"grad_norm": 0.3462936580181122, |
|
"learning_rate": 8.004722550177096e-05, |
|
"loss": 0.7386, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.6044600938967136, |
|
"grad_norm": 0.3483426868915558, |
|
"learning_rate": 7.981109799291619e-05, |
|
"loss": 0.7548, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.6056338028169014, |
|
"grad_norm": 0.3555918335914612, |
|
"learning_rate": 7.95749704840614e-05, |
|
"loss": 0.7144, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.6068075117370892, |
|
"grad_norm": 0.3545628786087036, |
|
"learning_rate": 7.933884297520661e-05, |
|
"loss": 0.7601, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.607981220657277, |
|
"grad_norm": 0.3554907441139221, |
|
"learning_rate": 7.910271546635183e-05, |
|
"loss": 0.7464, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.6091549295774648, |
|
"grad_norm": 0.3457619547843933, |
|
"learning_rate": 7.886658795749705e-05, |
|
"loss": 0.7372, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.6103286384976526, |
|
"grad_norm": 0.3450148105621338, |
|
"learning_rate": 7.863046044864228e-05, |
|
"loss": 0.7265, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6115023474178404, |
|
"grad_norm": 0.3475225567817688, |
|
"learning_rate": 7.839433293978748e-05, |
|
"loss": 0.798, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.6126760563380281, |
|
"grad_norm": 0.34560921788215637, |
|
"learning_rate": 7.81582054309327e-05, |
|
"loss": 0.7583, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.613849765258216, |
|
"grad_norm": 0.33480820059776306, |
|
"learning_rate": 7.792207792207793e-05, |
|
"loss": 0.7658, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.6150234741784038, |
|
"grad_norm": 0.34581395983695984, |
|
"learning_rate": 7.768595041322314e-05, |
|
"loss": 0.7368, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.6161971830985915, |
|
"grad_norm": 0.35383906960487366, |
|
"learning_rate": 7.744982290436837e-05, |
|
"loss": 0.7963, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.6173708920187794, |
|
"grad_norm": 0.352117121219635, |
|
"learning_rate": 7.721369539551359e-05, |
|
"loss": 0.7589, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.6185446009389671, |
|
"grad_norm": 0.34420257806777954, |
|
"learning_rate": 7.697756788665879e-05, |
|
"loss": 0.7209, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.6197183098591549, |
|
"grad_norm": 0.3449562191963196, |
|
"learning_rate": 7.674144037780402e-05, |
|
"loss": 0.7526, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.6208920187793427, |
|
"grad_norm": 0.37377694249153137, |
|
"learning_rate": 7.650531286894924e-05, |
|
"loss": 0.7348, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.6220657276995305, |
|
"grad_norm": 0.32662031054496765, |
|
"learning_rate": 7.626918536009446e-05, |
|
"loss": 0.7125, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6232394366197183, |
|
"grad_norm": 0.3551415801048279, |
|
"learning_rate": 7.603305785123968e-05, |
|
"loss": 0.7497, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.6244131455399061, |
|
"grad_norm": 0.3519802689552307, |
|
"learning_rate": 7.579693034238488e-05, |
|
"loss": 0.7864, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.6255868544600939, |
|
"grad_norm": 0.3773750364780426, |
|
"learning_rate": 7.556080283353011e-05, |
|
"loss": 0.7681, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.6267605633802817, |
|
"grad_norm": 0.3558037281036377, |
|
"learning_rate": 7.532467532467533e-05, |
|
"loss": 0.7392, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.6279342723004695, |
|
"grad_norm": 0.33910447359085083, |
|
"learning_rate": 7.508854781582055e-05, |
|
"loss": 0.7036, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.6291079812206573, |
|
"grad_norm": 0.35620275139808655, |
|
"learning_rate": 7.485242030696577e-05, |
|
"loss": 0.7272, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.6302816901408451, |
|
"grad_norm": 0.3377542495727539, |
|
"learning_rate": 7.461629279811098e-05, |
|
"loss": 0.7244, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.6314553990610329, |
|
"grad_norm": 0.35217198729515076, |
|
"learning_rate": 7.43801652892562e-05, |
|
"loss": 0.7655, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.6326291079812206, |
|
"grad_norm": 0.34656718373298645, |
|
"learning_rate": 7.414403778040142e-05, |
|
"loss": 0.7474, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.6338028169014085, |
|
"grad_norm": 0.34429579973220825, |
|
"learning_rate": 7.390791027154664e-05, |
|
"loss": 0.7333, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6349765258215962, |
|
"grad_norm": 0.374262273311615, |
|
"learning_rate": 7.367178276269186e-05, |
|
"loss": 0.7876, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.636150234741784, |
|
"grad_norm": 0.363299161195755, |
|
"learning_rate": 7.343565525383707e-05, |
|
"loss": 0.7784, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.6373239436619719, |
|
"grad_norm": 0.36767125129699707, |
|
"learning_rate": 7.31995277449823e-05, |
|
"loss": 0.7329, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.6384976525821596, |
|
"grad_norm": 0.3338686525821686, |
|
"learning_rate": 7.296340023612751e-05, |
|
"loss": 0.7737, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.6396713615023474, |
|
"grad_norm": 0.3493046164512634, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.7461, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.6408450704225352, |
|
"grad_norm": 0.3691573441028595, |
|
"learning_rate": 7.249114521841795e-05, |
|
"loss": 0.765, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.642018779342723, |
|
"grad_norm": 0.3573099374771118, |
|
"learning_rate": 7.225501770956316e-05, |
|
"loss": 0.7589, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.6431924882629108, |
|
"grad_norm": 0.36218926310539246, |
|
"learning_rate": 7.201889020070838e-05, |
|
"loss": 0.7314, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.6443661971830986, |
|
"grad_norm": 0.35753628611564636, |
|
"learning_rate": 7.17827626918536e-05, |
|
"loss": 0.7564, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.6455399061032864, |
|
"grad_norm": 0.3394756615161896, |
|
"learning_rate": 7.154663518299882e-05, |
|
"loss": 0.7162, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6467136150234741, |
|
"grad_norm": 0.350090891122818, |
|
"learning_rate": 7.131050767414405e-05, |
|
"loss": 0.7561, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.647887323943662, |
|
"grad_norm": 0.328924298286438, |
|
"learning_rate": 7.107438016528925e-05, |
|
"loss": 0.7143, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.6490610328638498, |
|
"grad_norm": 0.3552818298339844, |
|
"learning_rate": 7.083825265643447e-05, |
|
"loss": 0.7264, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.6502347417840375, |
|
"grad_norm": 0.3504960536956787, |
|
"learning_rate": 7.06021251475797e-05, |
|
"loss": 0.7512, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.6514084507042254, |
|
"grad_norm": 0.33755823969841003, |
|
"learning_rate": 7.036599763872491e-05, |
|
"loss": 0.7621, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.6525821596244131, |
|
"grad_norm": 0.35977354645729065, |
|
"learning_rate": 7.012987012987014e-05, |
|
"loss": 0.776, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.653755868544601, |
|
"grad_norm": 0.37304726243019104, |
|
"learning_rate": 6.989374262101536e-05, |
|
"loss": 0.7601, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.6549295774647887, |
|
"grad_norm": 0.3569071590900421, |
|
"learning_rate": 6.965761511216056e-05, |
|
"loss": 0.7303, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.6561032863849765, |
|
"grad_norm": 0.348264217376709, |
|
"learning_rate": 6.94214876033058e-05, |
|
"loss": 0.759, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.6572769953051644, |
|
"grad_norm": 0.3501366674900055, |
|
"learning_rate": 6.9185360094451e-05, |
|
"loss": 0.7588, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6584507042253521, |
|
"grad_norm": 0.3633224666118622, |
|
"learning_rate": 6.894923258559623e-05, |
|
"loss": 0.7741, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.6596244131455399, |
|
"grad_norm": 0.35944506525993347, |
|
"learning_rate": 6.871310507674145e-05, |
|
"loss": 0.756, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.6607981220657277, |
|
"grad_norm": 0.3479359745979309, |
|
"learning_rate": 6.847697756788665e-05, |
|
"loss": 0.7292, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.6619718309859155, |
|
"grad_norm": 0.37013959884643555, |
|
"learning_rate": 6.824085005903188e-05, |
|
"loss": 0.7618, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.6631455399061033, |
|
"grad_norm": 0.36679190397262573, |
|
"learning_rate": 6.80047225501771e-05, |
|
"loss": 0.7797, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.6643192488262911, |
|
"grad_norm": 0.35092490911483765, |
|
"learning_rate": 6.776859504132232e-05, |
|
"loss": 0.705, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.6654929577464789, |
|
"grad_norm": 0.3594275712966919, |
|
"learning_rate": 6.753246753246754e-05, |
|
"loss": 0.7215, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.3503059148788452, |
|
"learning_rate": 6.729634002361276e-05, |
|
"loss": 0.7248, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.6678403755868545, |
|
"grad_norm": 0.35919633507728577, |
|
"learning_rate": 6.706021251475797e-05, |
|
"loss": 0.7718, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.6690140845070423, |
|
"grad_norm": 0.36752262711524963, |
|
"learning_rate": 6.682408500590319e-05, |
|
"loss": 0.7738, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.67018779342723, |
|
"grad_norm": 0.33812567591667175, |
|
"learning_rate": 6.658795749704841e-05, |
|
"loss": 0.7846, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.6713615023474179, |
|
"grad_norm": 0.3429810404777527, |
|
"learning_rate": 6.635182998819363e-05, |
|
"loss": 0.7371, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.6725352112676056, |
|
"grad_norm": 0.3457571864128113, |
|
"learning_rate": 6.611570247933885e-05, |
|
"loss": 0.7318, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.6737089201877934, |
|
"grad_norm": 0.3476294279098511, |
|
"learning_rate": 6.587957497048406e-05, |
|
"loss": 0.7344, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.6748826291079812, |
|
"grad_norm": 0.34464409947395325, |
|
"learning_rate": 6.564344746162928e-05, |
|
"loss": 0.7429, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.676056338028169, |
|
"grad_norm": 0.34444373846054077, |
|
"learning_rate": 6.54073199527745e-05, |
|
"loss": 0.7663, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.6772300469483568, |
|
"grad_norm": 0.3656728267669678, |
|
"learning_rate": 6.517119244391972e-05, |
|
"loss": 0.7068, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.6784037558685446, |
|
"grad_norm": 0.3591727614402771, |
|
"learning_rate": 6.493506493506494e-05, |
|
"loss": 0.7481, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.6795774647887324, |
|
"grad_norm": 0.38865676522254944, |
|
"learning_rate": 6.469893742621017e-05, |
|
"loss": 0.7659, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.6807511737089202, |
|
"grad_norm": 0.3438194990158081, |
|
"learning_rate": 6.446280991735537e-05, |
|
"loss": 0.6748, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.681924882629108, |
|
"grad_norm": 0.34979990124702454, |
|
"learning_rate": 6.422668240850059e-05, |
|
"loss": 0.7529, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.6830985915492958, |
|
"grad_norm": 0.37309062480926514, |
|
"learning_rate": 6.399055489964582e-05, |
|
"loss": 0.7417, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6842723004694836, |
|
"grad_norm": 0.3737837076187134, |
|
"learning_rate": 6.375442739079102e-05, |
|
"loss": 0.773, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.6854460093896714, |
|
"grad_norm": 0.3397013247013092, |
|
"learning_rate": 6.351829988193624e-05, |
|
"loss": 0.7093, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.6866197183098591, |
|
"grad_norm": 0.37165701389312744, |
|
"learning_rate": 6.328217237308147e-05, |
|
"loss": 0.7078, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.687793427230047, |
|
"grad_norm": 0.3533116579055786, |
|
"learning_rate": 6.304604486422668e-05, |
|
"loss": 0.7105, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.6889671361502347, |
|
"grad_norm": 0.35352569818496704, |
|
"learning_rate": 6.280991735537191e-05, |
|
"loss": 0.7282, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.6901408450704225, |
|
"grad_norm": 0.3754810094833374, |
|
"learning_rate": 6.257378984651711e-05, |
|
"loss": 0.7364, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.6913145539906104, |
|
"grad_norm": 0.36235493421554565, |
|
"learning_rate": 6.233766233766233e-05, |
|
"loss": 0.7024, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.6924882629107981, |
|
"grad_norm": 0.3446933627128601, |
|
"learning_rate": 6.210153482880756e-05, |
|
"loss": 0.7392, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6936619718309859, |
|
"grad_norm": 0.34918078780174255, |
|
"learning_rate": 6.186540731995277e-05, |
|
"loss": 0.6716, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.6948356807511737, |
|
"grad_norm": 0.3438567519187927, |
|
"learning_rate": 6.1629279811098e-05, |
|
"loss": 0.7812, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.6960093896713615, |
|
"grad_norm": 0.346626341342926, |
|
"learning_rate": 6.139315230224322e-05, |
|
"loss": 0.7538, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.6971830985915493, |
|
"grad_norm": 0.3506343960762024, |
|
"learning_rate": 6.115702479338842e-05, |
|
"loss": 0.7434, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.6983568075117371, |
|
"grad_norm": 0.35403555631637573, |
|
"learning_rate": 6.0920897284533654e-05, |
|
"loss": 0.7333, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.6995305164319249, |
|
"grad_norm": 0.3391430377960205, |
|
"learning_rate": 6.068476977567887e-05, |
|
"loss": 0.7486, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.7007042253521126, |
|
"grad_norm": 0.33783578872680664, |
|
"learning_rate": 6.044864226682408e-05, |
|
"loss": 0.7588, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.7018779342723005, |
|
"grad_norm": 0.3333738446235657, |
|
"learning_rate": 6.021251475796931e-05, |
|
"loss": 0.7268, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.7030516431924883, |
|
"grad_norm": 0.3494018316268921, |
|
"learning_rate": 5.997638724911453e-05, |
|
"loss": 0.7363, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 0.34416642785072327, |
|
"learning_rate": 5.9740259740259744e-05, |
|
"loss": 0.7322, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7053990610328639, |
|
"grad_norm": 0.3523387312889099, |
|
"learning_rate": 5.950413223140496e-05, |
|
"loss": 0.6986, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.7065727699530516, |
|
"grad_norm": 0.33000361919403076, |
|
"learning_rate": 5.926800472255017e-05, |
|
"loss": 0.7535, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.7077464788732394, |
|
"grad_norm": 0.33932214975357056, |
|
"learning_rate": 5.90318772136954e-05, |
|
"loss": 0.7051, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.7089201877934272, |
|
"grad_norm": 0.3373797833919525, |
|
"learning_rate": 5.8795749704840616e-05, |
|
"loss": 0.7022, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.710093896713615, |
|
"grad_norm": 0.35239875316619873, |
|
"learning_rate": 5.855962219598583e-05, |
|
"loss": 0.7893, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.7112676056338029, |
|
"grad_norm": 0.36973506212234497, |
|
"learning_rate": 5.832349468713105e-05, |
|
"loss": 0.7157, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.7124413145539906, |
|
"grad_norm": 0.3447434604167938, |
|
"learning_rate": 5.8087367178276277e-05, |
|
"loss": 0.7306, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.7136150234741784, |
|
"grad_norm": 0.36380118131637573, |
|
"learning_rate": 5.785123966942149e-05, |
|
"loss": 0.7238, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.7147887323943662, |
|
"grad_norm": 0.33784252405166626, |
|
"learning_rate": 5.7615112160566706e-05, |
|
"loss": 0.6792, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.715962441314554, |
|
"grad_norm": 0.34995025396347046, |
|
"learning_rate": 5.737898465171193e-05, |
|
"loss": 0.7158, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7171361502347418, |
|
"grad_norm": 0.3586655259132385, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.7345, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.7183098591549296, |
|
"grad_norm": 0.3490711450576782, |
|
"learning_rate": 5.6906729634002366e-05, |
|
"loss": 0.759, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.7194835680751174, |
|
"grad_norm": 0.3405636250972748, |
|
"learning_rate": 5.6670602125147584e-05, |
|
"loss": 0.7069, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.7206572769953051, |
|
"grad_norm": 0.3362460136413574, |
|
"learning_rate": 5.6434474616292796e-05, |
|
"loss": 0.7413, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.721830985915493, |
|
"grad_norm": 0.3571033775806427, |
|
"learning_rate": 5.619834710743802e-05, |
|
"loss": 0.7138, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.7230046948356808, |
|
"grad_norm": 0.33801379799842834, |
|
"learning_rate": 5.5962219598583245e-05, |
|
"loss": 0.7004, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.7241784037558685, |
|
"grad_norm": 0.350063294172287, |
|
"learning_rate": 5.5726092089728456e-05, |
|
"loss": 0.7342, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.7253521126760564, |
|
"grad_norm": 0.3471220135688782, |
|
"learning_rate": 5.5489964580873674e-05, |
|
"loss": 0.7591, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.7265258215962441, |
|
"grad_norm": 0.3600592613220215, |
|
"learning_rate": 5.5253837072018886e-05, |
|
"loss": 0.7427, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.7276995305164319, |
|
"grad_norm": 0.34294822812080383, |
|
"learning_rate": 5.501770956316411e-05, |
|
"loss": 0.7085, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7288732394366197, |
|
"grad_norm": 0.3481101989746094, |
|
"learning_rate": 5.4781582054309335e-05, |
|
"loss": 0.7465, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.7300469483568075, |
|
"grad_norm": 0.3402861952781677, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.7613, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.7312206572769953, |
|
"grad_norm": 0.3475019335746765, |
|
"learning_rate": 5.4309327036599764e-05, |
|
"loss": 0.775, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.7323943661971831, |
|
"grad_norm": 0.34003034234046936, |
|
"learning_rate": 5.407319952774499e-05, |
|
"loss": 0.6817, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.7335680751173709, |
|
"grad_norm": 0.33620044589042664, |
|
"learning_rate": 5.38370720188902e-05, |
|
"loss": 0.7392, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.7347417840375586, |
|
"grad_norm": 0.34645119309425354, |
|
"learning_rate": 5.3600944510035425e-05, |
|
"loss": 0.717, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.7359154929577465, |
|
"grad_norm": 0.3485560417175293, |
|
"learning_rate": 5.336481700118064e-05, |
|
"loss": 0.7361, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.7370892018779343, |
|
"grad_norm": 0.36997392773628235, |
|
"learning_rate": 5.3128689492325854e-05, |
|
"loss": 0.7264, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.7382629107981221, |
|
"grad_norm": 0.3379404842853546, |
|
"learning_rate": 5.289256198347108e-05, |
|
"loss": 0.7303, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.7394366197183099, |
|
"grad_norm": 0.3385223150253296, |
|
"learning_rate": 5.2656434474616304e-05, |
|
"loss": 0.7174, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7406103286384976, |
|
"grad_norm": 0.38303306698799133, |
|
"learning_rate": 5.2420306965761515e-05, |
|
"loss": 0.7539, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.7417840375586855, |
|
"grad_norm": 0.3544706404209137, |
|
"learning_rate": 5.218417945690673e-05, |
|
"loss": 0.7108, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.7429577464788732, |
|
"grad_norm": 0.35137131810188293, |
|
"learning_rate": 5.1948051948051944e-05, |
|
"loss": 0.7184, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.744131455399061, |
|
"grad_norm": 0.35326629877090454, |
|
"learning_rate": 5.171192443919717e-05, |
|
"loss": 0.7114, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.7453051643192489, |
|
"grad_norm": 0.35051414370536804, |
|
"learning_rate": 5.1475796930342393e-05, |
|
"loss": 0.6966, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.7464788732394366, |
|
"grad_norm": 0.37491628527641296, |
|
"learning_rate": 5.1239669421487605e-05, |
|
"loss": 0.7061, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.7476525821596244, |
|
"grad_norm": 0.37242433428764343, |
|
"learning_rate": 5.100354191263282e-05, |
|
"loss": 0.6904, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.7488262910798122, |
|
"grad_norm": 0.376429945230484, |
|
"learning_rate": 5.076741440377805e-05, |
|
"loss": 0.7203, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.34106218814849854, |
|
"learning_rate": 5.053128689492326e-05, |
|
"loss": 0.6878, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.7511737089201878, |
|
"grad_norm": 0.37987956404685974, |
|
"learning_rate": 5.029515938606848e-05, |
|
"loss": 0.7835, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7523474178403756, |
|
"grad_norm": 0.355932354927063, |
|
"learning_rate": 5.00590318772137e-05, |
|
"loss": 0.7382, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.7535211267605634, |
|
"grad_norm": 0.33495378494262695, |
|
"learning_rate": 4.982290436835891e-05, |
|
"loss": 0.7244, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.7546948356807511, |
|
"grad_norm": 0.36573663353919983, |
|
"learning_rate": 4.958677685950414e-05, |
|
"loss": 0.7339, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.755868544600939, |
|
"grad_norm": 0.34233418107032776, |
|
"learning_rate": 4.9350649350649355e-05, |
|
"loss": 0.7303, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.7570422535211268, |
|
"grad_norm": 0.36358365416526794, |
|
"learning_rate": 4.9114521841794566e-05, |
|
"loss": 0.7169, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.7582159624413145, |
|
"grad_norm": 0.3423750400543213, |
|
"learning_rate": 4.887839433293979e-05, |
|
"loss": 0.7413, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.7593896713615024, |
|
"grad_norm": 0.34080007672309875, |
|
"learning_rate": 4.864226682408501e-05, |
|
"loss": 0.7319, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.7605633802816901, |
|
"grad_norm": 0.35408544540405273, |
|
"learning_rate": 4.840613931523023e-05, |
|
"loss": 0.6895, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.7617370892018779, |
|
"grad_norm": 0.34515753388404846, |
|
"learning_rate": 4.8170011806375445e-05, |
|
"loss": 0.7181, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.7629107981220657, |
|
"grad_norm": 0.3446560502052307, |
|
"learning_rate": 4.793388429752066e-05, |
|
"loss": 0.7156, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7640845070422535, |
|
"grad_norm": 0.3451150357723236, |
|
"learning_rate": 4.769775678866588e-05, |
|
"loss": 0.7232, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.7652582159624414, |
|
"grad_norm": 0.357740193605423, |
|
"learning_rate": 4.74616292798111e-05, |
|
"loss": 0.6872, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.7664319248826291, |
|
"grad_norm": 0.3685015141963959, |
|
"learning_rate": 4.7225501770956324e-05, |
|
"loss": 0.735, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.7676056338028169, |
|
"grad_norm": 0.3503192961215973, |
|
"learning_rate": 4.6989374262101535e-05, |
|
"loss": 0.7336, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.7687793427230047, |
|
"grad_norm": 0.33453887701034546, |
|
"learning_rate": 4.675324675324675e-05, |
|
"loss": 0.7101, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.7699530516431925, |
|
"grad_norm": 0.3708442747592926, |
|
"learning_rate": 4.651711924439197e-05, |
|
"loss": 0.7153, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.7711267605633803, |
|
"grad_norm": 0.3736172318458557, |
|
"learning_rate": 4.6280991735537196e-05, |
|
"loss": 0.7071, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.7723004694835681, |
|
"grad_norm": 0.35988256335258484, |
|
"learning_rate": 4.6044864226682414e-05, |
|
"loss": 0.7285, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.7734741784037559, |
|
"grad_norm": 0.34314337372779846, |
|
"learning_rate": 4.5808736717827625e-05, |
|
"loss": 0.7137, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.7746478873239436, |
|
"grad_norm": 0.3723309338092804, |
|
"learning_rate": 4.557260920897285e-05, |
|
"loss": 0.7391, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7758215962441315, |
|
"grad_norm": 0.3581268787384033, |
|
"learning_rate": 4.533648170011807e-05, |
|
"loss": 0.7157, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.7769953051643192, |
|
"grad_norm": 0.36784443259239197, |
|
"learning_rate": 4.5100354191263286e-05, |
|
"loss": 0.6865, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.778169014084507, |
|
"grad_norm": 0.36377546191215515, |
|
"learning_rate": 4.48642266824085e-05, |
|
"loss": 0.7437, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.7793427230046949, |
|
"grad_norm": 0.349101722240448, |
|
"learning_rate": 4.462809917355372e-05, |
|
"loss": 0.7226, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.7805164319248826, |
|
"grad_norm": 0.36608216166496277, |
|
"learning_rate": 4.439197166469894e-05, |
|
"loss": 0.7543, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.7816901408450704, |
|
"grad_norm": 0.3495696783065796, |
|
"learning_rate": 4.415584415584416e-05, |
|
"loss": 0.708, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.7828638497652582, |
|
"grad_norm": 0.3664140999317169, |
|
"learning_rate": 4.3919716646989375e-05, |
|
"loss": 0.7225, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.784037558685446, |
|
"grad_norm": 0.3560849726200104, |
|
"learning_rate": 4.368358913813459e-05, |
|
"loss": 0.6972, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.7852112676056338, |
|
"grad_norm": 0.3571857511997223, |
|
"learning_rate": 4.344746162927981e-05, |
|
"loss": 0.694, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.7863849765258216, |
|
"grad_norm": 0.37072160840034485, |
|
"learning_rate": 4.321133412042503e-05, |
|
"loss": 0.7202, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7875586854460094, |
|
"grad_norm": 0.354948490858078, |
|
"learning_rate": 4.2975206611570254e-05, |
|
"loss": 0.7481, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.7887323943661971, |
|
"grad_norm": 0.3736347258090973, |
|
"learning_rate": 4.2739079102715465e-05, |
|
"loss": 0.7261, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.789906103286385, |
|
"grad_norm": 0.3690294623374939, |
|
"learning_rate": 4.250295159386068e-05, |
|
"loss": 0.7529, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.7910798122065728, |
|
"grad_norm": 0.354192316532135, |
|
"learning_rate": 4.226682408500591e-05, |
|
"loss": 0.7176, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.7922535211267606, |
|
"grad_norm": 0.355185866355896, |
|
"learning_rate": 4.2030696576151126e-05, |
|
"loss": 0.7099, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.7934272300469484, |
|
"grad_norm": 0.3503565490245819, |
|
"learning_rate": 4.1794569067296344e-05, |
|
"loss": 0.7072, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.7946009389671361, |
|
"grad_norm": 0.3727845549583435, |
|
"learning_rate": 4.155844155844156e-05, |
|
"loss": 0.7334, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.795774647887324, |
|
"grad_norm": 0.33894312381744385, |
|
"learning_rate": 4.132231404958678e-05, |
|
"loss": 0.6946, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.7969483568075117, |
|
"grad_norm": 0.3385523855686188, |
|
"learning_rate": 4.1086186540732e-05, |
|
"loss": 0.7096, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.7981220657276995, |
|
"grad_norm": 0.3488437235355377, |
|
"learning_rate": 4.0850059031877216e-05, |
|
"loss": 0.6942, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7992957746478874, |
|
"grad_norm": 0.34666576981544495, |
|
"learning_rate": 4.0613931523022434e-05, |
|
"loss": 0.7329, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.8004694835680751, |
|
"grad_norm": 0.3557136356830597, |
|
"learning_rate": 4.037780401416765e-05, |
|
"loss": 0.7655, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.8016431924882629, |
|
"grad_norm": 0.3647683262825012, |
|
"learning_rate": 4.014167650531287e-05, |
|
"loss": 0.7578, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.8028169014084507, |
|
"grad_norm": 0.3452191650867462, |
|
"learning_rate": 3.9905548996458095e-05, |
|
"loss": 0.7145, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.8039906103286385, |
|
"grad_norm": 0.3540481925010681, |
|
"learning_rate": 3.9669421487603306e-05, |
|
"loss": 0.7347, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.8051643192488263, |
|
"grad_norm": 0.3536418378353119, |
|
"learning_rate": 3.9433293978748524e-05, |
|
"loss": 0.7103, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.8063380281690141, |
|
"grad_norm": 0.34728798270225525, |
|
"learning_rate": 3.919716646989374e-05, |
|
"loss": 0.7376, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.8075117370892019, |
|
"grad_norm": 0.354643851518631, |
|
"learning_rate": 3.8961038961038966e-05, |
|
"loss": 0.7223, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.8086854460093896, |
|
"grad_norm": 0.3438583016395569, |
|
"learning_rate": 3.8724911452184184e-05, |
|
"loss": 0.6906, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.8098591549295775, |
|
"grad_norm": 0.34713107347488403, |
|
"learning_rate": 3.8488783943329396e-05, |
|
"loss": 0.7361, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.8110328638497653, |
|
"grad_norm": 0.3483150005340576, |
|
"learning_rate": 3.825265643447462e-05, |
|
"loss": 0.7016, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.812206572769953, |
|
"grad_norm": 0.34848445653915405, |
|
"learning_rate": 3.801652892561984e-05, |
|
"loss": 0.6966, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.8133802816901409, |
|
"grad_norm": 0.34223318099975586, |
|
"learning_rate": 3.7780401416765056e-05, |
|
"loss": 0.7088, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.8145539906103286, |
|
"grad_norm": 0.33693239092826843, |
|
"learning_rate": 3.7544273907910274e-05, |
|
"loss": 0.7108, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.8157276995305164, |
|
"grad_norm": 0.34613272547721863, |
|
"learning_rate": 3.730814639905549e-05, |
|
"loss": 0.7075, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.8169014084507042, |
|
"grad_norm": 0.3430733382701874, |
|
"learning_rate": 3.707201889020071e-05, |
|
"loss": 0.7246, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.818075117370892, |
|
"grad_norm": 0.35237351059913635, |
|
"learning_rate": 3.683589138134593e-05, |
|
"loss": 0.6918, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.8192488262910798, |
|
"grad_norm": 0.3375650644302368, |
|
"learning_rate": 3.659976387249115e-05, |
|
"loss": 0.6978, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.8204225352112676, |
|
"grad_norm": 0.3585062026977539, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.7241, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.8215962441314554, |
|
"grad_norm": 0.35660460591316223, |
|
"learning_rate": 3.612750885478158e-05, |
|
"loss": 0.6946, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8227699530516432, |
|
"grad_norm": 0.3468845784664154, |
|
"learning_rate": 3.58913813459268e-05, |
|
"loss": 0.7535, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.823943661971831, |
|
"grad_norm": 0.365291029214859, |
|
"learning_rate": 3.5655253837072025e-05, |
|
"loss": 0.7438, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.8251173708920188, |
|
"grad_norm": 0.353506863117218, |
|
"learning_rate": 3.5419126328217236e-05, |
|
"loss": 0.7359, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.8262910798122066, |
|
"grad_norm": 0.381610244512558, |
|
"learning_rate": 3.5182998819362454e-05, |
|
"loss": 0.7821, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.8274647887323944, |
|
"grad_norm": 0.37710806727409363, |
|
"learning_rate": 3.494687131050768e-05, |
|
"loss": 0.7349, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.8286384976525821, |
|
"grad_norm": 0.361545592546463, |
|
"learning_rate": 3.47107438016529e-05, |
|
"loss": 0.7229, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.82981220657277, |
|
"grad_norm": 0.3615299463272095, |
|
"learning_rate": 3.4474616292798115e-05, |
|
"loss": 0.748, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.8309859154929577, |
|
"grad_norm": 0.3437252342700958, |
|
"learning_rate": 3.4238488783943326e-05, |
|
"loss": 0.7165, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.8321596244131455, |
|
"grad_norm": 0.35603129863739014, |
|
"learning_rate": 3.400236127508855e-05, |
|
"loss": 0.7373, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.3586898446083069, |
|
"learning_rate": 3.376623376623377e-05, |
|
"loss": 0.7056, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8345070422535211, |
|
"grad_norm": 0.3558507263660431, |
|
"learning_rate": 3.353010625737899e-05, |
|
"loss": 0.742, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.8356807511737089, |
|
"grad_norm": 0.3359735608100891, |
|
"learning_rate": 3.3293978748524205e-05, |
|
"loss": 0.6994, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.8368544600938967, |
|
"grad_norm": 0.34250345826148987, |
|
"learning_rate": 3.305785123966942e-05, |
|
"loss": 0.6762, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.8380281690140845, |
|
"grad_norm": 0.38417667150497437, |
|
"learning_rate": 3.282172373081464e-05, |
|
"loss": 0.7213, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.8392018779342723, |
|
"grad_norm": 0.3643978536128998, |
|
"learning_rate": 3.258559622195986e-05, |
|
"loss": 0.6884, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.8403755868544601, |
|
"grad_norm": 0.3544299602508545, |
|
"learning_rate": 3.234946871310508e-05, |
|
"loss": 0.712, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.8415492957746479, |
|
"grad_norm": 0.36903661489486694, |
|
"learning_rate": 3.2113341204250294e-05, |
|
"loss": 0.7227, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.8427230046948356, |
|
"grad_norm": 0.3557377755641937, |
|
"learning_rate": 3.187721369539551e-05, |
|
"loss": 0.6904, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.8438967136150235, |
|
"grad_norm": 0.36762547492980957, |
|
"learning_rate": 3.164108618654074e-05, |
|
"loss": 0.7469, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"grad_norm": 0.35805556178092957, |
|
"learning_rate": 3.1404958677685955e-05, |
|
"loss": 0.7443, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.846244131455399, |
|
"grad_norm": 0.38130536675453186, |
|
"learning_rate": 3.1168831168831166e-05, |
|
"loss": 0.7664, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.8474178403755869, |
|
"grad_norm": 0.3599521219730377, |
|
"learning_rate": 3.0932703659976384e-05, |
|
"loss": 0.7065, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.8485915492957746, |
|
"grad_norm": 0.3531062602996826, |
|
"learning_rate": 3.069657615112161e-05, |
|
"loss": 0.7451, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.8497652582159625, |
|
"grad_norm": 0.36916878819465637, |
|
"learning_rate": 3.0460448642266827e-05, |
|
"loss": 0.7077, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.8509389671361502, |
|
"grad_norm": 0.38139578700065613, |
|
"learning_rate": 3.022432113341204e-05, |
|
"loss": 0.7452, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.852112676056338, |
|
"grad_norm": 0.337944358587265, |
|
"learning_rate": 2.9988193624557266e-05, |
|
"loss": 0.6596, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.8532863849765259, |
|
"grad_norm": 0.36196213960647583, |
|
"learning_rate": 2.975206611570248e-05, |
|
"loss": 0.7081, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.8544600938967136, |
|
"grad_norm": 0.34913602471351624, |
|
"learning_rate": 2.95159386068477e-05, |
|
"loss": 0.6901, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.8556338028169014, |
|
"grad_norm": 0.343414843082428, |
|
"learning_rate": 2.9279811097992914e-05, |
|
"loss": 0.675, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.8568075117370892, |
|
"grad_norm": 0.3704102039337158, |
|
"learning_rate": 2.9043683589138138e-05, |
|
"loss": 0.7566, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.857981220657277, |
|
"grad_norm": 0.3464911878108978, |
|
"learning_rate": 2.8807556080283353e-05, |
|
"loss": 0.6872, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.8591549295774648, |
|
"grad_norm": 0.3615940511226654, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.7755, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.8603286384976526, |
|
"grad_norm": 0.35284191370010376, |
|
"learning_rate": 2.8335301062573792e-05, |
|
"loss": 0.7483, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.8615023474178404, |
|
"grad_norm": 0.3469059467315674, |
|
"learning_rate": 2.809917355371901e-05, |
|
"loss": 0.6902, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.8626760563380281, |
|
"grad_norm": 0.35148003697395325, |
|
"learning_rate": 2.7863046044864228e-05, |
|
"loss": 0.732, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.863849765258216, |
|
"grad_norm": 0.3533206880092621, |
|
"learning_rate": 2.7626918536009443e-05, |
|
"loss": 0.7287, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.8650234741784038, |
|
"grad_norm": 0.383095383644104, |
|
"learning_rate": 2.7390791027154668e-05, |
|
"loss": 0.8017, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.8661971830985915, |
|
"grad_norm": 0.3541397452354431, |
|
"learning_rate": 2.7154663518299882e-05, |
|
"loss": 0.7291, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.8673708920187794, |
|
"grad_norm": 0.35989582538604736, |
|
"learning_rate": 2.69185360094451e-05, |
|
"loss": 0.7211, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.8685446009389671, |
|
"grad_norm": 0.34245404601097107, |
|
"learning_rate": 2.668240850059032e-05, |
|
"loss": 0.7062, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8697183098591549, |
|
"grad_norm": 0.3396112024784088, |
|
"learning_rate": 2.644628099173554e-05, |
|
"loss": 0.6946, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.8708920187793427, |
|
"grad_norm": 0.34901162981987, |
|
"learning_rate": 2.6210153482880757e-05, |
|
"loss": 0.7742, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.8720657276995305, |
|
"grad_norm": 0.3654363453388214, |
|
"learning_rate": 2.5974025974025972e-05, |
|
"loss": 0.7894, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.8732394366197183, |
|
"grad_norm": 0.3478833734989166, |
|
"learning_rate": 2.5737898465171197e-05, |
|
"loss": 0.6909, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.8744131455399061, |
|
"grad_norm": 0.3447161912918091, |
|
"learning_rate": 2.550177095631641e-05, |
|
"loss": 0.7166, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.8755868544600939, |
|
"grad_norm": 0.35436901450157166, |
|
"learning_rate": 2.526564344746163e-05, |
|
"loss": 0.6962, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.8767605633802817, |
|
"grad_norm": 0.3359661400318146, |
|
"learning_rate": 2.502951593860685e-05, |
|
"loss": 0.7345, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.8779342723004695, |
|
"grad_norm": 0.35876211524009705, |
|
"learning_rate": 2.479338842975207e-05, |
|
"loss": 0.6723, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.8791079812206573, |
|
"grad_norm": 0.35507625341415405, |
|
"learning_rate": 2.4557260920897283e-05, |
|
"loss": 0.6744, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.8802816901408451, |
|
"grad_norm": 0.3504907786846161, |
|
"learning_rate": 2.4321133412042505e-05, |
|
"loss": 0.7281, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8814553990610329, |
|
"grad_norm": 0.3498130440711975, |
|
"learning_rate": 2.4085005903187723e-05, |
|
"loss": 0.7079, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.8826291079812206, |
|
"grad_norm": 0.36793026328086853, |
|
"learning_rate": 2.384887839433294e-05, |
|
"loss": 0.747, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.8838028169014085, |
|
"grad_norm": 0.3484232723712921, |
|
"learning_rate": 2.3612750885478162e-05, |
|
"loss": 0.7347, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.8849765258215962, |
|
"grad_norm": 0.34402692317962646, |
|
"learning_rate": 2.3376623376623376e-05, |
|
"loss": 0.6717, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.886150234741784, |
|
"grad_norm": 0.377380907535553, |
|
"learning_rate": 2.3140495867768598e-05, |
|
"loss": 0.7642, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.8873239436619719, |
|
"grad_norm": 0.361382395029068, |
|
"learning_rate": 2.2904368358913812e-05, |
|
"loss": 0.7081, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.8884976525821596, |
|
"grad_norm": 0.3643784821033478, |
|
"learning_rate": 2.2668240850059034e-05, |
|
"loss": 0.7219, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.8896713615023474, |
|
"grad_norm": 0.3974801301956177, |
|
"learning_rate": 2.243211334120425e-05, |
|
"loss": 0.712, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.8908450704225352, |
|
"grad_norm": 0.35573598742485046, |
|
"learning_rate": 2.219598583234947e-05, |
|
"loss": 0.7335, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.892018779342723, |
|
"grad_norm": 0.3532857596874237, |
|
"learning_rate": 2.1959858323494688e-05, |
|
"loss": 0.7013, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8931924882629108, |
|
"grad_norm": 0.33362728357315063, |
|
"learning_rate": 2.1723730814639906e-05, |
|
"loss": 0.6739, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.8943661971830986, |
|
"grad_norm": 0.3325813412666321, |
|
"learning_rate": 2.1487603305785127e-05, |
|
"loss": 0.7099, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.8955399061032864, |
|
"grad_norm": 0.3451225459575653, |
|
"learning_rate": 2.125147579693034e-05, |
|
"loss": 0.6959, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.8967136150234741, |
|
"grad_norm": 0.3604796528816223, |
|
"learning_rate": 2.1015348288075563e-05, |
|
"loss": 0.737, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.897887323943662, |
|
"grad_norm": 0.34980282187461853, |
|
"learning_rate": 2.077922077922078e-05, |
|
"loss": 0.7206, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.8990610328638498, |
|
"grad_norm": 0.35130617022514343, |
|
"learning_rate": 2.0543093270366e-05, |
|
"loss": 0.7153, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.9002347417840375, |
|
"grad_norm": 0.34524810314178467, |
|
"learning_rate": 2.0306965761511217e-05, |
|
"loss": 0.7237, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.9014084507042254, |
|
"grad_norm": 0.35661572217941284, |
|
"learning_rate": 2.0070838252656435e-05, |
|
"loss": 0.6831, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.9025821596244131, |
|
"grad_norm": 0.35206255316734314, |
|
"learning_rate": 1.9834710743801653e-05, |
|
"loss": 0.7721, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.903755868544601, |
|
"grad_norm": 0.35439351201057434, |
|
"learning_rate": 1.959858323494687e-05, |
|
"loss": 0.7142, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9049295774647887, |
|
"grad_norm": 0.33722493052482605, |
|
"learning_rate": 1.9362455726092092e-05, |
|
"loss": 0.6968, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.9061032863849765, |
|
"grad_norm": 0.3573172092437744, |
|
"learning_rate": 1.912632821723731e-05, |
|
"loss": 0.7301, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.9072769953051644, |
|
"grad_norm": 0.3347008526325226, |
|
"learning_rate": 1.8890200708382528e-05, |
|
"loss": 0.6721, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.9084507042253521, |
|
"grad_norm": 0.3563063144683838, |
|
"learning_rate": 1.8654073199527746e-05, |
|
"loss": 0.7233, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.9096244131455399, |
|
"grad_norm": 0.35159915685653687, |
|
"learning_rate": 1.8417945690672964e-05, |
|
"loss": 0.7184, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.9107981220657277, |
|
"grad_norm": 0.35826948285102844, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.7301, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.9119718309859155, |
|
"grad_norm": 0.3533133268356323, |
|
"learning_rate": 1.79456906729634e-05, |
|
"loss": 0.7373, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.9131455399061033, |
|
"grad_norm": 0.3495820164680481, |
|
"learning_rate": 1.7709563164108618e-05, |
|
"loss": 0.7379, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.9143192488262911, |
|
"grad_norm": 0.33082085847854614, |
|
"learning_rate": 1.747343565525384e-05, |
|
"loss": 0.6789, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.9154929577464789, |
|
"grad_norm": 0.34669029712677, |
|
"learning_rate": 1.7237308146399057e-05, |
|
"loss": 0.6962, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 0.3401969373226166, |
|
"learning_rate": 1.7001180637544275e-05, |
|
"loss": 0.717, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.9178403755868545, |
|
"grad_norm": 0.3488728702068329, |
|
"learning_rate": 1.6765053128689493e-05, |
|
"loss": 0.7087, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.9190140845070423, |
|
"grad_norm": 0.39244547486305237, |
|
"learning_rate": 1.652892561983471e-05, |
|
"loss": 0.7331, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.92018779342723, |
|
"grad_norm": 0.33185505867004395, |
|
"learning_rate": 1.629279811097993e-05, |
|
"loss": 0.6821, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.9213615023474179, |
|
"grad_norm": 0.34186288714408875, |
|
"learning_rate": 1.6056670602125147e-05, |
|
"loss": 0.6766, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.9225352112676056, |
|
"grad_norm": 0.34512627124786377, |
|
"learning_rate": 1.582054309327037e-05, |
|
"loss": 0.6837, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.9237089201877934, |
|
"grad_norm": 0.34042122960090637, |
|
"learning_rate": 1.5584415584415583e-05, |
|
"loss": 0.7266, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.9248826291079812, |
|
"grad_norm": 0.34173402190208435, |
|
"learning_rate": 1.5348288075560805e-05, |
|
"loss": 0.6998, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.926056338028169, |
|
"grad_norm": 0.34008073806762695, |
|
"learning_rate": 1.511216056670602e-05, |
|
"loss": 0.7211, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.9272300469483568, |
|
"grad_norm": 0.3400252163410187, |
|
"learning_rate": 1.487603305785124e-05, |
|
"loss": 0.6771, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9284037558685446, |
|
"grad_norm": 0.3393029570579529, |
|
"learning_rate": 1.4639905548996457e-05, |
|
"loss": 0.7274, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.9295774647887324, |
|
"grad_norm": 0.3489772379398346, |
|
"learning_rate": 1.4403778040141676e-05, |
|
"loss": 0.7195, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.9307511737089202, |
|
"grad_norm": 0.3434072732925415, |
|
"learning_rate": 1.4167650531286896e-05, |
|
"loss": 0.6806, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.931924882629108, |
|
"grad_norm": 0.35593146085739136, |
|
"learning_rate": 1.3931523022432114e-05, |
|
"loss": 0.7026, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.9330985915492958, |
|
"grad_norm": 0.33654287457466125, |
|
"learning_rate": 1.3695395513577334e-05, |
|
"loss": 0.6655, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.9342723004694836, |
|
"grad_norm": 0.35049983859062195, |
|
"learning_rate": 1.345926800472255e-05, |
|
"loss": 0.686, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.9354460093896714, |
|
"grad_norm": 0.3442087471485138, |
|
"learning_rate": 1.322314049586777e-05, |
|
"loss": 0.7048, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.9366197183098591, |
|
"grad_norm": 0.3569439649581909, |
|
"learning_rate": 1.2987012987012986e-05, |
|
"loss": 0.7271, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.937793427230047, |
|
"grad_norm": 0.3418942391872406, |
|
"learning_rate": 1.2750885478158206e-05, |
|
"loss": 0.7132, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.9389671361502347, |
|
"grad_norm": 0.3399513363838196, |
|
"learning_rate": 1.2514757969303425e-05, |
|
"loss": 0.7046, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9401408450704225, |
|
"grad_norm": 0.34055379033088684, |
|
"learning_rate": 0.00010641553855208948, |
|
"loss": 0.7293, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.9413145539906104, |
|
"grad_norm": 0.3299119770526886, |
|
"learning_rate": 0.0001062978222483814, |
|
"loss": 0.6779, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.9424882629107981, |
|
"grad_norm": 0.3833242356777191, |
|
"learning_rate": 0.00010618010594467334, |
|
"loss": 0.6909, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.9436619718309859, |
|
"grad_norm": 0.39958855509757996, |
|
"learning_rate": 0.00010606238964096529, |
|
"loss": 0.7307, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.9448356807511737, |
|
"grad_norm": 0.38618725538253784, |
|
"learning_rate": 0.00010594467333725722, |
|
"loss": 0.6984, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.9460093896713615, |
|
"grad_norm": 0.4084942936897278, |
|
"learning_rate": 0.00010582695703354914, |
|
"loss": 0.7456, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.9471830985915493, |
|
"grad_norm": 0.4109421372413635, |
|
"learning_rate": 0.00010570924072984109, |
|
"loss": 0.6991, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.9483568075117371, |
|
"grad_norm": 0.382415771484375, |
|
"learning_rate": 0.00010559152442613303, |
|
"loss": 0.726, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.9495305164319249, |
|
"grad_norm": 0.4036392867565155, |
|
"learning_rate": 0.00010547380812242496, |
|
"loss": 0.7264, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.9507042253521126, |
|
"grad_norm": 0.38903331756591797, |
|
"learning_rate": 0.00010535609181871691, |
|
"loss": 0.691, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.9518779342723005, |
|
"grad_norm": 0.3803318440914154, |
|
"learning_rate": 0.00010523837551500883, |
|
"loss": 0.7271, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.9530516431924883, |
|
"grad_norm": 0.3850460350513458, |
|
"learning_rate": 0.00010512065921130076, |
|
"loss": 0.7111, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.954225352112676, |
|
"grad_norm": 0.4110994040966034, |
|
"learning_rate": 0.00010500294290759271, |
|
"loss": 0.7282, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.9553990610328639, |
|
"grad_norm": 0.3853722810745239, |
|
"learning_rate": 0.00010488522660388465, |
|
"loss": 0.7194, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.9565727699530516, |
|
"grad_norm": 0.37440797686576843, |
|
"learning_rate": 0.00010476751030017658, |
|
"loss": 0.7116, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.9577464788732394, |
|
"grad_norm": 0.42637899518013, |
|
"learning_rate": 0.00010464979399646853, |
|
"loss": 0.7189, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.9589201877934272, |
|
"grad_norm": 0.4067356288433075, |
|
"learning_rate": 0.00010453207769276045, |
|
"loss": 0.7509, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.960093896713615, |
|
"grad_norm": 0.3854503929615021, |
|
"learning_rate": 0.00010441436138905238, |
|
"loss": 0.7426, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.9612676056338029, |
|
"grad_norm": 0.4298991858959198, |
|
"learning_rate": 0.00010429664508534433, |
|
"loss": 0.7528, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.9624413145539906, |
|
"grad_norm": 0.3748774826526642, |
|
"learning_rate": 0.00010417892878163627, |
|
"loss": 0.6512, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.9636150234741784, |
|
"grad_norm": 0.38448989391326904, |
|
"learning_rate": 0.00010406121247792819, |
|
"loss": 0.6929, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.9647887323943662, |
|
"grad_norm": 0.42416030168533325, |
|
"learning_rate": 0.00010394349617422015, |
|
"loss": 0.7312, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.965962441314554, |
|
"grad_norm": 0.3875625729560852, |
|
"learning_rate": 0.00010382577987051207, |
|
"loss": 0.7121, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.9671361502347418, |
|
"grad_norm": 0.4241638481616974, |
|
"learning_rate": 0.000103708063566804, |
|
"loss": 0.7248, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.9683098591549296, |
|
"grad_norm": 0.4026165306568146, |
|
"learning_rate": 0.00010359034726309595, |
|
"loss": 0.7224, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.9694835680751174, |
|
"grad_norm": 0.39895206689834595, |
|
"learning_rate": 0.00010347263095938789, |
|
"loss": 0.7193, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.9706572769953051, |
|
"grad_norm": 0.395463228225708, |
|
"learning_rate": 0.00010335491465567981, |
|
"loss": 0.7673, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.971830985915493, |
|
"grad_norm": 0.4351494312286377, |
|
"learning_rate": 0.00010323719835197174, |
|
"loss": 0.7684, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.9730046948356808, |
|
"grad_norm": 0.4378681182861328, |
|
"learning_rate": 0.00010311948204826369, |
|
"loss": 0.7277, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.9741784037558685, |
|
"grad_norm": 0.4214630722999573, |
|
"learning_rate": 0.00010300176574455563, |
|
"loss": 0.7107, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9753521126760564, |
|
"grad_norm": 0.41999107599258423, |
|
"learning_rate": 0.00010288404944084755, |
|
"loss": 0.7328, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.9765258215962441, |
|
"grad_norm": 0.49026909470558167, |
|
"learning_rate": 0.00010276633313713951, |
|
"loss": 0.7345, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.9776995305164319, |
|
"grad_norm": 0.4068211317062378, |
|
"learning_rate": 0.00010264861683343143, |
|
"loss": 0.701, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.9788732394366197, |
|
"grad_norm": 0.42514288425445557, |
|
"learning_rate": 0.00010253090052972336, |
|
"loss": 0.729, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.9800469483568075, |
|
"grad_norm": 0.4883005619049072, |
|
"learning_rate": 0.00010241318422601531, |
|
"loss": 0.7183, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.9812206572769953, |
|
"grad_norm": 0.38146787881851196, |
|
"learning_rate": 0.00010229546792230725, |
|
"loss": 0.6977, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.9823943661971831, |
|
"grad_norm": 0.3898909389972687, |
|
"learning_rate": 0.00010217775161859917, |
|
"loss": 0.7131, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.9835680751173709, |
|
"grad_norm": 0.39693424105644226, |
|
"learning_rate": 0.00010206003531489112, |
|
"loss": 0.7184, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.9847417840375586, |
|
"grad_norm": 0.3968975841999054, |
|
"learning_rate": 0.00010194231901118305, |
|
"loss": 0.7536, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"grad_norm": 0.4030087888240814, |
|
"learning_rate": 0.00010182460270747499, |
|
"loss": 0.7156, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.9870892018779343, |
|
"grad_norm": 0.37477344274520874, |
|
"learning_rate": 0.00010170688640376693, |
|
"loss": 0.6815, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.9882629107981221, |
|
"grad_norm": 0.40929409861564636, |
|
"learning_rate": 0.00010158917010005887, |
|
"loss": 0.6827, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.9894366197183099, |
|
"grad_norm": 0.36350882053375244, |
|
"learning_rate": 0.00010147145379635079, |
|
"loss": 0.6927, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.9906103286384976, |
|
"grad_norm": 0.3828059434890747, |
|
"learning_rate": 0.00010135373749264274, |
|
"loss": 0.7254, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.9917840375586855, |
|
"grad_norm": 0.4095743000507355, |
|
"learning_rate": 0.00010123602118893467, |
|
"loss": 0.719, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.9929577464788732, |
|
"grad_norm": 0.37418296933174133, |
|
"learning_rate": 0.0001011183048852266, |
|
"loss": 0.682, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.994131455399061, |
|
"grad_norm": 0.39427751302719116, |
|
"learning_rate": 0.00010100058858151855, |
|
"loss": 0.7742, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.9953051643192489, |
|
"grad_norm": 0.3696395754814148, |
|
"learning_rate": 0.00010088287227781048, |
|
"loss": 0.7377, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.9964788732394366, |
|
"grad_norm": 0.36249879002571106, |
|
"learning_rate": 0.00010076515597410241, |
|
"loss": 0.7237, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.9976525821596244, |
|
"grad_norm": 0.3712272047996521, |
|
"learning_rate": 0.00010064743967039436, |
|
"loss": 0.6737, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9988262910798122, |
|
"grad_norm": 0.37550613284111023, |
|
"learning_rate": 0.00010052972336668629, |
|
"loss": 0.7147, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.405351459980011, |
|
"learning_rate": 0.00010041200706297821, |
|
"loss": 0.7364, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.0011737089201878, |
|
"grad_norm": 0.39747750759124756, |
|
"learning_rate": 0.00010029429075927018, |
|
"loss": 0.6934, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.0023474178403755, |
|
"grad_norm": 0.3695623576641083, |
|
"learning_rate": 0.0001001765744555621, |
|
"loss": 0.6971, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.0035211267605635, |
|
"grad_norm": 0.3880208134651184, |
|
"learning_rate": 0.00010005885815185403, |
|
"loss": 0.7219, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.0046948356807512, |
|
"grad_norm": 0.40131011605262756, |
|
"learning_rate": 9.994114184814597e-05, |
|
"loss": 0.6925, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.005868544600939, |
|
"grad_norm": 0.38630256056785583, |
|
"learning_rate": 9.982342554443791e-05, |
|
"loss": 0.7412, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.0070422535211268, |
|
"grad_norm": 0.39141979813575745, |
|
"learning_rate": 9.970570924072985e-05, |
|
"loss": 0.7089, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.0082159624413145, |
|
"grad_norm": 0.3811167776584625, |
|
"learning_rate": 9.958799293702178e-05, |
|
"loss": 0.6979, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.0093896713615023, |
|
"grad_norm": 0.38177528977394104, |
|
"learning_rate": 9.947027663331372e-05, |
|
"loss": 0.7181, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.0105633802816902, |
|
"grad_norm": 0.36225804686546326, |
|
"learning_rate": 9.935256032960567e-05, |
|
"loss": 0.6495, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.011737089201878, |
|
"grad_norm": 0.3796376585960388, |
|
"learning_rate": 9.923484402589759e-05, |
|
"loss": 0.6661, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.0129107981220657, |
|
"grad_norm": 0.3896029591560364, |
|
"learning_rate": 9.911712772218953e-05, |
|
"loss": 0.6705, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.0140845070422535, |
|
"grad_norm": 0.35688912868499756, |
|
"learning_rate": 9.899941141848147e-05, |
|
"loss": 0.6835, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.0152582159624413, |
|
"grad_norm": 0.3919657766819, |
|
"learning_rate": 9.88816951147734e-05, |
|
"loss": 0.6771, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.016431924882629, |
|
"grad_norm": 0.390311062335968, |
|
"learning_rate": 9.876397881106534e-05, |
|
"loss": 0.7208, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.017605633802817, |
|
"grad_norm": 0.3857402205467224, |
|
"learning_rate": 9.864626250735727e-05, |
|
"loss": 0.7321, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.0187793427230047, |
|
"grad_norm": 0.3688738942146301, |
|
"learning_rate": 9.852854620364921e-05, |
|
"loss": 0.6853, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.0199530516431925, |
|
"grad_norm": 0.3814820647239685, |
|
"learning_rate": 9.841082989994114e-05, |
|
"loss": 0.664, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.0211267605633803, |
|
"grad_norm": 0.3849344253540039, |
|
"learning_rate": 9.829311359623309e-05, |
|
"loss": 0.6844, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.022300469483568, |
|
"grad_norm": 0.36203038692474365, |
|
"learning_rate": 9.817539729252502e-05, |
|
"loss": 0.7201, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.0234741784037558, |
|
"grad_norm": 0.36614471673965454, |
|
"learning_rate": 9.805768098881696e-05, |
|
"loss": 0.659, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.0246478873239437, |
|
"grad_norm": 0.3908173143863678, |
|
"learning_rate": 9.79399646851089e-05, |
|
"loss": 0.6638, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.0258215962441315, |
|
"grad_norm": 0.35966452956199646, |
|
"learning_rate": 9.782224838140083e-05, |
|
"loss": 0.7187, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.0269953051643192, |
|
"grad_norm": 0.40878093242645264, |
|
"learning_rate": 9.770453207769276e-05, |
|
"loss": 0.691, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.028169014084507, |
|
"grad_norm": 0.38903382420539856, |
|
"learning_rate": 9.75868157739847e-05, |
|
"loss": 0.718, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.0293427230046948, |
|
"grad_norm": 0.3865324556827545, |
|
"learning_rate": 9.746909947027663e-05, |
|
"loss": 0.7331, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.0305164319248827, |
|
"grad_norm": 0.37417513132095337, |
|
"learning_rate": 9.735138316656858e-05, |
|
"loss": 0.677, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.0316901408450705, |
|
"grad_norm": 0.38043439388275146, |
|
"learning_rate": 9.72336668628605e-05, |
|
"loss": 0.6932, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.0328638497652582, |
|
"grad_norm": 0.37418729066848755, |
|
"learning_rate": 9.711595055915245e-05, |
|
"loss": 0.7119, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.034037558685446, |
|
"grad_norm": 0.4013047218322754, |
|
"learning_rate": 9.699823425544438e-05, |
|
"loss": 0.7041, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.0352112676056338, |
|
"grad_norm": 0.38462570309638977, |
|
"learning_rate": 9.688051795173632e-05, |
|
"loss": 0.6861, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.0363849765258215, |
|
"grad_norm": 0.3900148868560791, |
|
"learning_rate": 9.676280164802825e-05, |
|
"loss": 0.6382, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.0375586854460095, |
|
"grad_norm": 0.3882652819156647, |
|
"learning_rate": 9.66450853443202e-05, |
|
"loss": 0.6948, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.0387323943661972, |
|
"grad_norm": 0.36546608805656433, |
|
"learning_rate": 9.652736904061212e-05, |
|
"loss": 0.7064, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.039906103286385, |
|
"grad_norm": 0.3788559138774872, |
|
"learning_rate": 9.640965273690407e-05, |
|
"loss": 0.7129, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.0410798122065728, |
|
"grad_norm": 0.3979467451572418, |
|
"learning_rate": 9.6291936433196e-05, |
|
"loss": 0.7196, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.0422535211267605, |
|
"grad_norm": 0.3777488172054291, |
|
"learning_rate": 9.617422012948794e-05, |
|
"loss": 0.6922, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.0434272300469483, |
|
"grad_norm": 0.39730504155158997, |
|
"learning_rate": 9.605650382577987e-05, |
|
"loss": 0.6529, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.0446009389671362, |
|
"grad_norm": 0.39619576930999756, |
|
"learning_rate": 9.593878752207182e-05, |
|
"loss": 0.6505, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.045774647887324, |
|
"grad_norm": 0.3763888478279114, |
|
"learning_rate": 9.582107121836374e-05, |
|
"loss": 0.638, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.0469483568075117, |
|
"grad_norm": 0.3947450518608093, |
|
"learning_rate": 9.570335491465569e-05, |
|
"loss": 0.7099, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.0481220657276995, |
|
"grad_norm": 0.43239885568618774, |
|
"learning_rate": 9.558563861094763e-05, |
|
"loss": 0.7112, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.0492957746478873, |
|
"grad_norm": 0.37725165486335754, |
|
"learning_rate": 9.546792230723956e-05, |
|
"loss": 0.6775, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.050469483568075, |
|
"grad_norm": 0.3807140290737152, |
|
"learning_rate": 9.53502060035315e-05, |
|
"loss": 0.7201, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.051643192488263, |
|
"grad_norm": 0.40270236134529114, |
|
"learning_rate": 9.523248969982343e-05, |
|
"loss": 0.6908, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.0528169014084507, |
|
"grad_norm": 0.38907137513160706, |
|
"learning_rate": 9.511477339611536e-05, |
|
"loss": 0.7274, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.0539906103286385, |
|
"grad_norm": 0.35074397921562195, |
|
"learning_rate": 9.49970570924073e-05, |
|
"loss": 0.6765, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.0551643192488263, |
|
"grad_norm": 0.37548649311065674, |
|
"learning_rate": 9.487934078869925e-05, |
|
"loss": 0.7258, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.056338028169014, |
|
"grad_norm": 0.3947518467903137, |
|
"learning_rate": 9.476162448499118e-05, |
|
"loss": 0.7142, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.057511737089202, |
|
"grad_norm": 0.36888387799263, |
|
"learning_rate": 9.464390818128312e-05, |
|
"loss": 0.664, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.0586854460093897, |
|
"grad_norm": 0.3735831379890442, |
|
"learning_rate": 9.452619187757505e-05, |
|
"loss": 0.6914, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.0598591549295775, |
|
"grad_norm": 0.3840358257293701, |
|
"learning_rate": 9.440847557386698e-05, |
|
"loss": 0.663, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.0610328638497653, |
|
"grad_norm": 0.408840537071228, |
|
"learning_rate": 9.429075927015892e-05, |
|
"loss": 0.7225, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.062206572769953, |
|
"grad_norm": 0.36408165097236633, |
|
"learning_rate": 9.417304296645085e-05, |
|
"loss": 0.6744, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.0633802816901408, |
|
"grad_norm": 0.4005196690559387, |
|
"learning_rate": 9.405532666274279e-05, |
|
"loss": 0.7285, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.0645539906103287, |
|
"grad_norm": 0.3824830949306488, |
|
"learning_rate": 9.393761035903474e-05, |
|
"loss": 0.6978, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.0657276995305165, |
|
"grad_norm": 0.38410818576812744, |
|
"learning_rate": 9.381989405532666e-05, |
|
"loss": 0.6725, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.0669014084507042, |
|
"grad_norm": 0.37026217579841614, |
|
"learning_rate": 9.37021777516186e-05, |
|
"loss": 0.6908, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.068075117370892, |
|
"grad_norm": 0.37652963399887085, |
|
"learning_rate": 9.358446144791054e-05, |
|
"loss": 0.6674, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.0692488262910798, |
|
"grad_norm": 0.40584585070610046, |
|
"learning_rate": 9.346674514420247e-05, |
|
"loss": 0.7087, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.0704225352112675, |
|
"grad_norm": 0.3777616620063782, |
|
"learning_rate": 9.334902884049441e-05, |
|
"loss": 0.6633, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.0715962441314555, |
|
"grad_norm": 0.35584181547164917, |
|
"learning_rate": 9.323131253678636e-05, |
|
"loss": 0.6484, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.0727699530516432, |
|
"grad_norm": 0.40920573472976685, |
|
"learning_rate": 9.311359623307828e-05, |
|
"loss": 0.6781, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.073943661971831, |
|
"grad_norm": 0.37617766857147217, |
|
"learning_rate": 9.299587992937023e-05, |
|
"loss": 0.6785, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.0751173708920188, |
|
"grad_norm": 0.36854755878448486, |
|
"learning_rate": 9.287816362566216e-05, |
|
"loss": 0.6805, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.0762910798122065, |
|
"grad_norm": 0.3820021152496338, |
|
"learning_rate": 9.27604473219541e-05, |
|
"loss": 0.7413, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.0774647887323943, |
|
"grad_norm": 0.3654205799102783, |
|
"learning_rate": 9.264273101824603e-05, |
|
"loss": 0.6996, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.0786384976525822, |
|
"grad_norm": 0.36847448348999023, |
|
"learning_rate": 9.252501471453798e-05, |
|
"loss": 0.6593, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.07981220657277, |
|
"grad_norm": 0.4072454571723938, |
|
"learning_rate": 9.24072984108299e-05, |
|
"loss": 0.7062, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.0809859154929577, |
|
"grad_norm": 0.37201663851737976, |
|
"learning_rate": 9.228958210712185e-05, |
|
"loss": 0.7188, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.0821596244131455, |
|
"grad_norm": 0.40708494186401367, |
|
"learning_rate": 9.217186580341378e-05, |
|
"loss": 0.6984, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.0833333333333333, |
|
"grad_norm": 0.37668758630752563, |
|
"learning_rate": 9.205414949970572e-05, |
|
"loss": 0.6856, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.084507042253521, |
|
"grad_norm": 0.41518712043762207, |
|
"learning_rate": 9.193643319599765e-05, |
|
"loss": 0.7093, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.085680751173709, |
|
"grad_norm": 0.3661474883556366, |
|
"learning_rate": 9.181871689228958e-05, |
|
"loss": 0.6765, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.0868544600938967, |
|
"grad_norm": 0.3910673260688782, |
|
"learning_rate": 9.170100058858152e-05, |
|
"loss": 0.6778, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.0880281690140845, |
|
"grad_norm": 0.3851100206375122, |
|
"learning_rate": 9.158328428487345e-05, |
|
"loss": 0.7188, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.0892018779342723, |
|
"grad_norm": 0.36254799365997314, |
|
"learning_rate": 9.14655679811654e-05, |
|
"loss": 0.7182, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.09037558685446, |
|
"grad_norm": 0.39364567399024963, |
|
"learning_rate": 9.134785167745734e-05, |
|
"loss": 0.7208, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.091549295774648, |
|
"grad_norm": 0.3755466639995575, |
|
"learning_rate": 9.123013537374927e-05, |
|
"loss": 0.6771, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0927230046948357, |
|
"grad_norm": 0.361087828874588, |
|
"learning_rate": 9.11124190700412e-05, |
|
"loss": 0.6541, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.0938967136150235, |
|
"grad_norm": 0.37327754497528076, |
|
"learning_rate": 9.099470276633314e-05, |
|
"loss": 0.698, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.0950704225352113, |
|
"grad_norm": 0.38413748145103455, |
|
"learning_rate": 9.087698646262507e-05, |
|
"loss": 0.6933, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.096244131455399, |
|
"grad_norm": 0.4182147681713104, |
|
"learning_rate": 9.075927015891701e-05, |
|
"loss": 0.6776, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.0974178403755868, |
|
"grad_norm": 0.3987724483013153, |
|
"learning_rate": 9.064155385520894e-05, |
|
"loss": 0.694, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.0985915492957747, |
|
"grad_norm": 0.37629225850105286, |
|
"learning_rate": 9.052383755150089e-05, |
|
"loss": 0.6565, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.0997652582159625, |
|
"grad_norm": 0.38973352313041687, |
|
"learning_rate": 9.040612124779281e-05, |
|
"loss": 0.6739, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.1009389671361502, |
|
"grad_norm": 0.3845914900302887, |
|
"learning_rate": 9.028840494408476e-05, |
|
"loss": 0.6788, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.102112676056338, |
|
"grad_norm": 0.3861023485660553, |
|
"learning_rate": 9.01706886403767e-05, |
|
"loss": 0.6763, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.1032863849765258, |
|
"grad_norm": 0.37565183639526367, |
|
"learning_rate": 9.005297233666863e-05, |
|
"loss": 0.6478, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.1044600938967135, |
|
"grad_norm": 0.4068315029144287, |
|
"learning_rate": 8.993525603296056e-05, |
|
"loss": 0.6752, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.1056338028169015, |
|
"grad_norm": 0.37796974182128906, |
|
"learning_rate": 8.981753972925251e-05, |
|
"loss": 0.7355, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.1068075117370892, |
|
"grad_norm": 0.4024117887020111, |
|
"learning_rate": 8.969982342554443e-05, |
|
"loss": 0.6648, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.107981220657277, |
|
"grad_norm": 0.404442697763443, |
|
"learning_rate": 8.958210712183638e-05, |
|
"loss": 0.7, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.1091549295774648, |
|
"grad_norm": 0.35948899388313293, |
|
"learning_rate": 8.946439081812832e-05, |
|
"loss": 0.6859, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.1103286384976525, |
|
"grad_norm": 0.4014012813568115, |
|
"learning_rate": 8.934667451442025e-05, |
|
"loss": 0.7294, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.1115023474178405, |
|
"grad_norm": 0.38261109590530396, |
|
"learning_rate": 8.922895821071219e-05, |
|
"loss": 0.6965, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.1126760563380282, |
|
"grad_norm": 0.39297208189964294, |
|
"learning_rate": 8.911124190700413e-05, |
|
"loss": 0.7153, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.113849765258216, |
|
"grad_norm": 0.3710176348686218, |
|
"learning_rate": 8.899352560329605e-05, |
|
"loss": 0.7085, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.1150234741784038, |
|
"grad_norm": 0.3750080168247223, |
|
"learning_rate": 8.8875809299588e-05, |
|
"loss": 0.6739, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.1161971830985915, |
|
"grad_norm": 0.3672105073928833, |
|
"learning_rate": 8.875809299587994e-05, |
|
"loss": 0.7097, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.1173708920187793, |
|
"grad_norm": 0.3663265109062195, |
|
"learning_rate": 8.864037669217187e-05, |
|
"loss": 0.6594, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.1185446009389672, |
|
"grad_norm": 0.4023442268371582, |
|
"learning_rate": 8.85226603884638e-05, |
|
"loss": 0.7186, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.119718309859155, |
|
"grad_norm": 0.36602139472961426, |
|
"learning_rate": 8.840494408475574e-05, |
|
"loss": 0.67, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.1208920187793427, |
|
"grad_norm": 0.36866381764411926, |
|
"learning_rate": 8.828722778104768e-05, |
|
"loss": 0.6954, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.1220657276995305, |
|
"grad_norm": 0.38905832171440125, |
|
"learning_rate": 8.816951147733961e-05, |
|
"loss": 0.7214, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.1232394366197183, |
|
"grad_norm": 0.3806670010089874, |
|
"learning_rate": 8.805179517363156e-05, |
|
"loss": 0.6679, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.124413145539906, |
|
"grad_norm": 0.3796343505382538, |
|
"learning_rate": 8.793407886992349e-05, |
|
"loss": 0.6334, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.125586854460094, |
|
"grad_norm": 0.4143288731575012, |
|
"learning_rate": 8.781636256621543e-05, |
|
"loss": 0.7484, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"grad_norm": 0.3692832887172699, |
|
"learning_rate": 8.769864626250736e-05, |
|
"loss": 0.6581, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.1279342723004695, |
|
"grad_norm": 0.39971667528152466, |
|
"learning_rate": 8.75809299587993e-05, |
|
"loss": 0.7252, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.1291079812206573, |
|
"grad_norm": 0.391924113035202, |
|
"learning_rate": 8.746321365509123e-05, |
|
"loss": 0.673, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.130281690140845, |
|
"grad_norm": 0.39626866579055786, |
|
"learning_rate": 8.734549735138317e-05, |
|
"loss": 0.7161, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.131455399061033, |
|
"grad_norm": 0.3812800347805023, |
|
"learning_rate": 8.72277810476751e-05, |
|
"loss": 0.6735, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.1326291079812207, |
|
"grad_norm": 0.36054447293281555, |
|
"learning_rate": 8.711006474396705e-05, |
|
"loss": 0.6861, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.1338028169014085, |
|
"grad_norm": 0.41179588437080383, |
|
"learning_rate": 8.699234844025897e-05, |
|
"loss": 0.7151, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.1349765258215962, |
|
"grad_norm": 0.3688051998615265, |
|
"learning_rate": 8.687463213655092e-05, |
|
"loss": 0.6608, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.136150234741784, |
|
"grad_norm": 0.3877013325691223, |
|
"learning_rate": 8.675691583284285e-05, |
|
"loss": 0.6826, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.1373239436619718, |
|
"grad_norm": 0.38986387848854065, |
|
"learning_rate": 8.663919952913479e-05, |
|
"loss": 0.6915, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.1384976525821595, |
|
"grad_norm": 0.41986656188964844, |
|
"learning_rate": 8.652148322542672e-05, |
|
"loss": 0.7471, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.1396713615023475, |
|
"grad_norm": 0.3977747857570648, |
|
"learning_rate": 8.640376692171867e-05, |
|
"loss": 0.6844, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.1408450704225352, |
|
"grad_norm": 0.3956218361854553, |
|
"learning_rate": 8.628605061801059e-05, |
|
"loss": 0.6586, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.142018779342723, |
|
"grad_norm": 0.3789028227329254, |
|
"learning_rate": 8.616833431430254e-05, |
|
"loss": 0.7415, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.1431924882629108, |
|
"grad_norm": 0.3878764808177948, |
|
"learning_rate": 8.605061801059447e-05, |
|
"loss": 0.6559, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.1443661971830985, |
|
"grad_norm": 0.37901559472084045, |
|
"learning_rate": 8.593290170688641e-05, |
|
"loss": 0.6685, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.1455399061032865, |
|
"grad_norm": 0.40399041771888733, |
|
"learning_rate": 8.581518540317834e-05, |
|
"loss": 0.6602, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.1467136150234742, |
|
"grad_norm": 0.38144391775131226, |
|
"learning_rate": 8.569746909947029e-05, |
|
"loss": 0.6683, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.147887323943662, |
|
"grad_norm": 0.3610433042049408, |
|
"learning_rate": 8.557975279576221e-05, |
|
"loss": 0.6579, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.1490610328638498, |
|
"grad_norm": 0.42147722840309143, |
|
"learning_rate": 8.546203649205416e-05, |
|
"loss": 0.6997, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.1502347417840375, |
|
"grad_norm": 0.3799455761909485, |
|
"learning_rate": 8.53443201883461e-05, |
|
"loss": 0.7096, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.1514084507042253, |
|
"grad_norm": 0.4173739552497864, |
|
"learning_rate": 8.522660388463803e-05, |
|
"loss": 0.6708, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.1525821596244132, |
|
"grad_norm": 0.3997640013694763, |
|
"learning_rate": 8.510888758092996e-05, |
|
"loss": 0.6514, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.153755868544601, |
|
"grad_norm": 0.3758656978607178, |
|
"learning_rate": 8.49911712772219e-05, |
|
"loss": 0.6442, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.1549295774647887, |
|
"grad_norm": 0.37429675459861755, |
|
"learning_rate": 8.487345497351383e-05, |
|
"loss": 0.6619, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.1561032863849765, |
|
"grad_norm": 0.3747265934944153, |
|
"learning_rate": 8.475573866980577e-05, |
|
"loss": 0.7107, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.1572769953051643, |
|
"grad_norm": 0.37782514095306396, |
|
"learning_rate": 8.463802236609771e-05, |
|
"loss": 0.7241, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.158450704225352, |
|
"grad_norm": 0.3703122138977051, |
|
"learning_rate": 8.452030606238965e-05, |
|
"loss": 0.6952, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.15962441314554, |
|
"grad_norm": 0.37990477681159973, |
|
"learning_rate": 8.440258975868158e-05, |
|
"loss": 0.7364, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.1607981220657277, |
|
"grad_norm": 0.42046844959259033, |
|
"learning_rate": 8.428487345497352e-05, |
|
"loss": 0.695, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.1619718309859155, |
|
"grad_norm": 0.3745966851711273, |
|
"learning_rate": 8.416715715126545e-05, |
|
"loss": 0.6875, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.1631455399061033, |
|
"grad_norm": 0.3496320843696594, |
|
"learning_rate": 8.404944084755739e-05, |
|
"loss": 0.6826, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.164319248826291, |
|
"grad_norm": 0.39181873202323914, |
|
"learning_rate": 8.393172454384934e-05, |
|
"loss": 0.6937, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.165492957746479, |
|
"grad_norm": 0.3910543620586395, |
|
"learning_rate": 8.381400824014126e-05, |
|
"loss": 0.749, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.3770748972892761, |
|
"learning_rate": 8.36962919364332e-05, |
|
"loss": 0.6743, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.1678403755868545, |
|
"grad_norm": 0.3675018846988678, |
|
"learning_rate": 8.357857563272513e-05, |
|
"loss": 0.6499, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.1690140845070423, |
|
"grad_norm": 0.36867639422416687, |
|
"learning_rate": 8.346085932901707e-05, |
|
"loss": 0.6642, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.17018779342723, |
|
"grad_norm": 0.3860320746898651, |
|
"learning_rate": 8.334314302530901e-05, |
|
"loss": 0.6947, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.1713615023474178, |
|
"grad_norm": 0.36680731177330017, |
|
"learning_rate": 8.322542672160094e-05, |
|
"loss": 0.7111, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.1725352112676055, |
|
"grad_norm": 0.38997524976730347, |
|
"learning_rate": 8.310771041789288e-05, |
|
"loss": 0.6842, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.1737089201877935, |
|
"grad_norm": 0.3883102834224701, |
|
"learning_rate": 8.298999411418483e-05, |
|
"loss": 0.6655, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1704, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 1.4844237184940114e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|