|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9241877256317688, |
|
"eval_steps": 25, |
|
"global_step": 204, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019253910950661854, |
|
"grad_norm": 23.54662322998047, |
|
"learning_rate": 0.00019901960784313727, |
|
"loss": 9.4209, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03850782190132371, |
|
"grad_norm": 22.151025772094727, |
|
"learning_rate": 0.00019803921568627454, |
|
"loss": 9.3584, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05776173285198556, |
|
"grad_norm": 32.229759216308594, |
|
"learning_rate": 0.00019705882352941177, |
|
"loss": 9.1469, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07701564380264742, |
|
"grad_norm": 42.96324920654297, |
|
"learning_rate": 0.000196078431372549, |
|
"loss": 8.5595, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.09626955475330927, |
|
"grad_norm": 32.40974044799805, |
|
"learning_rate": 0.00019509803921568628, |
|
"loss": 8.3043, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.11552346570397112, |
|
"grad_norm": 32.838134765625, |
|
"learning_rate": 0.00019411764705882354, |
|
"loss": 8.1422, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.13477737665463296, |
|
"grad_norm": 34.38292694091797, |
|
"learning_rate": 0.0001931372549019608, |
|
"loss": 7.7643, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.15403128760529483, |
|
"grad_norm": 31.947425842285156, |
|
"learning_rate": 0.00019215686274509807, |
|
"loss": 7.4565, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.17328519855595667, |
|
"grad_norm": 242.39166259765625, |
|
"learning_rate": 0.0001911764705882353, |
|
"loss": 7.436, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.19253910950661854, |
|
"grad_norm": 25.68425750732422, |
|
"learning_rate": 0.00019019607843137254, |
|
"loss": 7.1307, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21179302045728038, |
|
"grad_norm": 24.717641830444336, |
|
"learning_rate": 0.0001892156862745098, |
|
"loss": 7.1206, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.23104693140794225, |
|
"grad_norm": 36.47980880737305, |
|
"learning_rate": 0.00018823529411764707, |
|
"loss": 6.6912, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2503008423586041, |
|
"grad_norm": 28.181612014770508, |
|
"learning_rate": 0.00018725490196078433, |
|
"loss": 6.6547, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2695547533092659, |
|
"grad_norm": 24.55516242980957, |
|
"learning_rate": 0.00018627450980392157, |
|
"loss": 6.9486, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2888086642599278, |
|
"grad_norm": 32.426963806152344, |
|
"learning_rate": 0.00018529411764705883, |
|
"loss": 7.1069, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.30806257521058966, |
|
"grad_norm": 20.413976669311523, |
|
"learning_rate": 0.00018431372549019607, |
|
"loss": 6.6628, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.32731648616125153, |
|
"grad_norm": 28.58907699584961, |
|
"learning_rate": 0.00018333333333333334, |
|
"loss": 6.5333, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.34657039711191334, |
|
"grad_norm": 24.02996253967285, |
|
"learning_rate": 0.0001823529411764706, |
|
"loss": 6.5981, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3658243080625752, |
|
"grad_norm": 23.250669479370117, |
|
"learning_rate": 0.00018137254901960786, |
|
"loss": 6.4779, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3850782190132371, |
|
"grad_norm": 15.006091117858887, |
|
"learning_rate": 0.0001803921568627451, |
|
"loss": 6.6096, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4043321299638989, |
|
"grad_norm": 16.560985565185547, |
|
"learning_rate": 0.00017941176470588236, |
|
"loss": 6.6496, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.42358604091456076, |
|
"grad_norm": 31.329875946044922, |
|
"learning_rate": 0.00017843137254901963, |
|
"loss": 6.9627, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4428399518652226, |
|
"grad_norm": 12.381958961486816, |
|
"learning_rate": 0.00017745098039215687, |
|
"loss": 6.398, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4620938628158845, |
|
"grad_norm": 9.271923065185547, |
|
"learning_rate": 0.00017647058823529413, |
|
"loss": 6.6, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4813477737665463, |
|
"grad_norm": 12.544185638427734, |
|
"learning_rate": 0.00017549019607843137, |
|
"loss": 6.4684, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4813477737665463, |
|
"eval_clap": 0.09883298724889755, |
|
"eval_loss": 6.00625467300415, |
|
"eval_runtime": 166.3531, |
|
"eval_samples_per_second": 0.096, |
|
"eval_steps_per_second": 0.096, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5006016847172082, |
|
"grad_norm": 11.769013404846191, |
|
"learning_rate": 0.00017450980392156863, |
|
"loss": 6.5248, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.51985559566787, |
|
"grad_norm": 11.039627075195312, |
|
"learning_rate": 0.0001735294117647059, |
|
"loss": 6.6403, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5391095066185319, |
|
"grad_norm": 17.4042911529541, |
|
"learning_rate": 0.00017254901960784316, |
|
"loss": 6.8092, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5583634175691937, |
|
"grad_norm": 12.926351547241211, |
|
"learning_rate": 0.0001715686274509804, |
|
"loss": 6.5886, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5776173285198556, |
|
"grad_norm": 12.865156173706055, |
|
"learning_rate": 0.00017058823529411766, |
|
"loss": 6.6176, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5968712394705175, |
|
"grad_norm": 15.517515182495117, |
|
"learning_rate": 0.0001696078431372549, |
|
"loss": 6.4096, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6161251504211793, |
|
"grad_norm": 12.356785774230957, |
|
"learning_rate": 0.00016862745098039216, |
|
"loss": 6.4528, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6353790613718412, |
|
"grad_norm": 15.226251602172852, |
|
"learning_rate": 0.00016764705882352942, |
|
"loss": 6.3188, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6546329723225031, |
|
"grad_norm": 13.221582412719727, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 6.542, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.6738868832731648, |
|
"grad_norm": 13.414304733276367, |
|
"learning_rate": 0.00016568627450980395, |
|
"loss": 6.4272, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6931407942238267, |
|
"grad_norm": 27.81321907043457, |
|
"learning_rate": 0.0001647058823529412, |
|
"loss": 6.7035, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7123947051744886, |
|
"grad_norm": 17.882911682128906, |
|
"learning_rate": 0.00016372549019607843, |
|
"loss": 6.6117, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.7316486161251504, |
|
"grad_norm": 10.675613403320312, |
|
"learning_rate": 0.0001627450980392157, |
|
"loss": 6.4818, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.7509025270758123, |
|
"grad_norm": 11.32511043548584, |
|
"learning_rate": 0.00016176470588235295, |
|
"loss": 6.4717, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7701564380264742, |
|
"grad_norm": 13.292048454284668, |
|
"learning_rate": 0.00016078431372549022, |
|
"loss": 6.4119, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.789410348977136, |
|
"grad_norm": 9.824177742004395, |
|
"learning_rate": 0.00015980392156862746, |
|
"loss": 6.6399, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.8086642599277978, |
|
"grad_norm": 18.48476791381836, |
|
"learning_rate": 0.0001588235294117647, |
|
"loss": 6.4116, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.8279181708784596, |
|
"grad_norm": 10.409250259399414, |
|
"learning_rate": 0.00015784313725490196, |
|
"loss": 6.4832, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.8471720818291215, |
|
"grad_norm": 18.297466278076172, |
|
"learning_rate": 0.00015686274509803922, |
|
"loss": 6.308, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.8664259927797834, |
|
"grad_norm": 12.408952713012695, |
|
"learning_rate": 0.00015588235294117648, |
|
"loss": 6.3373, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8856799037304453, |
|
"grad_norm": 12.280571937561035, |
|
"learning_rate": 0.00015490196078431375, |
|
"loss": 6.3173, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.9049338146811071, |
|
"grad_norm": 12.348167419433594, |
|
"learning_rate": 0.00015392156862745098, |
|
"loss": 6.2873, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.924187725631769, |
|
"grad_norm": 28.005126953125, |
|
"learning_rate": 0.00015294117647058822, |
|
"loss": 6.7117, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.9434416365824309, |
|
"grad_norm": 16.248571395874023, |
|
"learning_rate": 0.00015196078431372549, |
|
"loss": 6.3493, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.9626955475330926, |
|
"grad_norm": 19.102869033813477, |
|
"learning_rate": 0.00015098039215686275, |
|
"loss": 6.4209, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9626955475330926, |
|
"eval_clap": 0.13957397639751434, |
|
"eval_loss": 6.070012092590332, |
|
"eval_runtime": 165.6113, |
|
"eval_samples_per_second": 0.097, |
|
"eval_steps_per_second": 0.097, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9819494584837545, |
|
"grad_norm": 6.675487995147705, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 6.1695, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 14.88092041015625, |
|
"learning_rate": 0.00014901960784313728, |
|
"loss": 5.6169, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.0192539109506618, |
|
"grad_norm": 19.78269386291504, |
|
"learning_rate": 0.00014803921568627451, |
|
"loss": 6.5455, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.0385078219013237, |
|
"grad_norm": 7.873740196228027, |
|
"learning_rate": 0.00014705882352941178, |
|
"loss": 6.3154, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.0577617328519855, |
|
"grad_norm": 10.514632225036621, |
|
"learning_rate": 0.00014607843137254902, |
|
"loss": 6.5085, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0770156438026475, |
|
"grad_norm": 10.021757125854492, |
|
"learning_rate": 0.00014509803921568628, |
|
"loss": 6.5109, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0962695547533092, |
|
"grad_norm": 8.690667152404785, |
|
"learning_rate": 0.00014411764705882354, |
|
"loss": 6.5515, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.1155234657039712, |
|
"grad_norm": 12.78662109375, |
|
"learning_rate": 0.00014313725490196078, |
|
"loss": 6.5425, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.134777376654633, |
|
"grad_norm": 10.592965126037598, |
|
"learning_rate": 0.00014215686274509804, |
|
"loss": 6.5105, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.154031287605295, |
|
"grad_norm": 7.947122573852539, |
|
"learning_rate": 0.0001411764705882353, |
|
"loss": 6.6142, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1732851985559567, |
|
"grad_norm": 6.823319911956787, |
|
"learning_rate": 0.00014019607843137255, |
|
"loss": 6.5339, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.1925391095066185, |
|
"grad_norm": 16.670989990234375, |
|
"learning_rate": 0.0001392156862745098, |
|
"loss": 6.3022, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.2117930204572804, |
|
"grad_norm": 20.09317398071289, |
|
"learning_rate": 0.00013823529411764707, |
|
"loss": 6.0779, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.2310469314079422, |
|
"grad_norm": 8.030014991760254, |
|
"learning_rate": 0.0001372549019607843, |
|
"loss": 6.3284, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.2503008423586042, |
|
"grad_norm": 10.324827194213867, |
|
"learning_rate": 0.00013627450980392157, |
|
"loss": 6.4022, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.269554753309266, |
|
"grad_norm": 29.070960998535156, |
|
"learning_rate": 0.00013529411764705884, |
|
"loss": 6.7835, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.288808664259928, |
|
"grad_norm": 17.838394165039062, |
|
"learning_rate": 0.00013431372549019608, |
|
"loss": 6.5344, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.3080625752105897, |
|
"grad_norm": 10.388354301452637, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 6.3438, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.3273164861612514, |
|
"grad_norm": 9.607653617858887, |
|
"learning_rate": 0.0001323529411764706, |
|
"loss": 6.4325, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.3465703971119134, |
|
"grad_norm": 9.639688491821289, |
|
"learning_rate": 0.00013137254901960784, |
|
"loss": 6.3907, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3658243080625752, |
|
"grad_norm": 9.424043655395508, |
|
"learning_rate": 0.0001303921568627451, |
|
"loss": 6.605, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.3850782190132371, |
|
"grad_norm": 8.21303653717041, |
|
"learning_rate": 0.00012941176470588237, |
|
"loss": 6.6275, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.404332129963899, |
|
"grad_norm": 10.479741096496582, |
|
"learning_rate": 0.00012843137254901963, |
|
"loss": 6.4801, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.4235860409145609, |
|
"grad_norm": 21.424253463745117, |
|
"learning_rate": 0.00012745098039215687, |
|
"loss": 6.3391, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.4428399518652226, |
|
"grad_norm": 6.5513224601745605, |
|
"learning_rate": 0.0001264705882352941, |
|
"loss": 6.7252, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.4428399518652226, |
|
"eval_clap": 0.10309316217899323, |
|
"eval_loss": 6.036521911621094, |
|
"eval_runtime": 165.4554, |
|
"eval_samples_per_second": 0.097, |
|
"eval_steps_per_second": 0.097, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.4620938628158844, |
|
"grad_norm": 32.52528762817383, |
|
"learning_rate": 0.00012549019607843137, |
|
"loss": 6.1922, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.4813477737665464, |
|
"grad_norm": 23.51795196533203, |
|
"learning_rate": 0.00012450980392156863, |
|
"loss": 6.3506, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.5006016847172083, |
|
"grad_norm": 10.925686836242676, |
|
"learning_rate": 0.0001235294117647059, |
|
"loss": 6.4783, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.5198555956678699, |
|
"grad_norm": 7.924820899963379, |
|
"learning_rate": 0.00012254901960784316, |
|
"loss": 6.6288, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.5391095066185319, |
|
"grad_norm": 6.946601390838623, |
|
"learning_rate": 0.00012156862745098039, |
|
"loss": 6.4085, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.5583634175691938, |
|
"grad_norm": 10.120043754577637, |
|
"learning_rate": 0.00012058823529411765, |
|
"loss": 6.4667, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.5776173285198556, |
|
"grad_norm": 9.635017395019531, |
|
"learning_rate": 0.0001196078431372549, |
|
"loss": 6.3742, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.5968712394705173, |
|
"grad_norm": 6.578627586364746, |
|
"learning_rate": 0.00011862745098039216, |
|
"loss": 6.1956, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.6161251504211793, |
|
"grad_norm": 18.30640983581543, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 6.4804, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.6353790613718413, |
|
"grad_norm": 11.166876792907715, |
|
"learning_rate": 0.00011666666666666668, |
|
"loss": 6.4495, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.654632972322503, |
|
"grad_norm": 8.15738582611084, |
|
"learning_rate": 0.00011568627450980394, |
|
"loss": 6.1371, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.6738868832731648, |
|
"grad_norm": 9.473989486694336, |
|
"learning_rate": 0.00011470588235294118, |
|
"loss": 6.366, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.6931407942238268, |
|
"grad_norm": 16.634380340576172, |
|
"learning_rate": 0.00011372549019607843, |
|
"loss": 6.1748, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.7123947051744886, |
|
"grad_norm": 20.92518424987793, |
|
"learning_rate": 0.0001127450980392157, |
|
"loss": 6.0918, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.7316486161251503, |
|
"grad_norm": 10.186667442321777, |
|
"learning_rate": 0.00011176470588235294, |
|
"loss": 6.1072, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7509025270758123, |
|
"grad_norm": 21.300180435180664, |
|
"learning_rate": 0.00011078431372549021, |
|
"loss": 6.724, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.7701564380264743, |
|
"grad_norm": 17.833845138549805, |
|
"learning_rate": 0.00010980392156862746, |
|
"loss": 6.2231, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.789410348977136, |
|
"grad_norm": 12.850127220153809, |
|
"learning_rate": 0.0001088235294117647, |
|
"loss": 6.4846, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.8086642599277978, |
|
"grad_norm": 16.229764938354492, |
|
"learning_rate": 0.00010784313725490196, |
|
"loss": 6.6046, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.8279181708784598, |
|
"grad_norm": 41.6049690246582, |
|
"learning_rate": 0.00010686274509803922, |
|
"loss": 6.5044, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.8471720818291215, |
|
"grad_norm": 8.0320463180542, |
|
"learning_rate": 0.00010588235294117647, |
|
"loss": 6.4836, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.8664259927797833, |
|
"grad_norm": 19.129127502441406, |
|
"learning_rate": 0.00010490196078431374, |
|
"loss": 6.1962, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.8856799037304453, |
|
"grad_norm": 14.464997291564941, |
|
"learning_rate": 0.00010392156862745099, |
|
"loss": 6.2694, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.9049338146811072, |
|
"grad_norm": 25.245752334594727, |
|
"learning_rate": 0.00010294117647058823, |
|
"loss": 6.0148, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.924187725631769, |
|
"grad_norm": 12.66399097442627, |
|
"learning_rate": 0.00010196078431372549, |
|
"loss": 6.1879, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.924187725631769, |
|
"eval_clap": 0.12328307330608368, |
|
"eval_loss": 5.896579742431641, |
|
"eval_runtime": 165.5834, |
|
"eval_samples_per_second": 0.097, |
|
"eval_steps_per_second": 0.097, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9434416365824307, |
|
"grad_norm": 12.162952423095703, |
|
"learning_rate": 0.00010098039215686274, |
|
"loss": 6.1875, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.9626955475330927, |
|
"grad_norm": 16.754629135131836, |
|
"learning_rate": 0.0001, |
|
"loss": 6.5483, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.9819494584837545, |
|
"grad_norm": 9.804841995239258, |
|
"learning_rate": 9.901960784313727e-05, |
|
"loss": 6.0631, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 26.169551849365234, |
|
"learning_rate": 9.80392156862745e-05, |
|
"loss": 6.3384, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.019253910950662, |
|
"grad_norm": 22.054380416870117, |
|
"learning_rate": 9.705882352941177e-05, |
|
"loss": 6.5192, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.0385078219013235, |
|
"grad_norm": 13.319371223449707, |
|
"learning_rate": 9.607843137254903e-05, |
|
"loss": 6.1904, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0577617328519855, |
|
"grad_norm": 13.158707618713379, |
|
"learning_rate": 9.509803921568627e-05, |
|
"loss": 6.4906, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.0770156438026475, |
|
"grad_norm": 7.972289562225342, |
|
"learning_rate": 9.411764705882353e-05, |
|
"loss": 6.4551, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.0962695547533094, |
|
"grad_norm": 14.052528381347656, |
|
"learning_rate": 9.313725490196079e-05, |
|
"loss": 6.2028, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.115523465703971, |
|
"grad_norm": 21.128631591796875, |
|
"learning_rate": 9.215686274509804e-05, |
|
"loss": 6.121, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.134777376654633, |
|
"grad_norm": 9.11488151550293, |
|
"learning_rate": 9.11764705882353e-05, |
|
"loss": 6.559, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.154031287605295, |
|
"grad_norm": 10.081767082214355, |
|
"learning_rate": 9.019607843137255e-05, |
|
"loss": 6.4236, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.1732851985559565, |
|
"grad_norm": 7.397235870361328, |
|
"learning_rate": 8.921568627450981e-05, |
|
"loss": 6.5415, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.1925391095066185, |
|
"grad_norm": 9.652939796447754, |
|
"learning_rate": 8.823529411764706e-05, |
|
"loss": 6.3744, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.2117930204572804, |
|
"grad_norm": 12.823005676269531, |
|
"learning_rate": 8.725490196078432e-05, |
|
"loss": 5.9683, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.2310469314079424, |
|
"grad_norm": 9.981169700622559, |
|
"learning_rate": 8.627450980392158e-05, |
|
"loss": 6.2714, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.250300842358604, |
|
"grad_norm": 11.026590347290039, |
|
"learning_rate": 8.529411764705883e-05, |
|
"loss": 6.1287, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.269554753309266, |
|
"grad_norm": 14.469505310058594, |
|
"learning_rate": 8.431372549019608e-05, |
|
"loss": 6.2634, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.288808664259928, |
|
"grad_norm": 10.639300346374512, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 6.1014, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.30806257521059, |
|
"grad_norm": 10.407938003540039, |
|
"learning_rate": 8.23529411764706e-05, |
|
"loss": 6.2487, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.3273164861612514, |
|
"grad_norm": 18.310867309570312, |
|
"learning_rate": 8.137254901960785e-05, |
|
"loss": 6.025, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.3465703971119134, |
|
"grad_norm": 13.314108848571777, |
|
"learning_rate": 8.039215686274511e-05, |
|
"loss": 6.1319, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.3658243080625754, |
|
"grad_norm": 12.528412818908691, |
|
"learning_rate": 7.941176470588235e-05, |
|
"loss": 6.27, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.385078219013237, |
|
"grad_norm": 10.71603775024414, |
|
"learning_rate": 7.843137254901961e-05, |
|
"loss": 6.4118, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.404332129963899, |
|
"grad_norm": 8.234016418457031, |
|
"learning_rate": 7.745098039215687e-05, |
|
"loss": 6.3642, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.404332129963899, |
|
"eval_clap": 0.10650094598531723, |
|
"eval_loss": 6.806448936462402, |
|
"eval_runtime": 165.8182, |
|
"eval_samples_per_second": 0.096, |
|
"eval_steps_per_second": 0.096, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.423586040914561, |
|
"grad_norm": 13.84628963470459, |
|
"learning_rate": 7.647058823529411e-05, |
|
"loss": 6.0872, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.4428399518652224, |
|
"grad_norm": 7.576101779937744, |
|
"learning_rate": 7.549019607843137e-05, |
|
"loss": 6.3515, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.4620938628158844, |
|
"grad_norm": 9.205301284790039, |
|
"learning_rate": 7.450980392156864e-05, |
|
"loss": 6.0883, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.4813477737665464, |
|
"grad_norm": 8.85059928894043, |
|
"learning_rate": 7.352941176470589e-05, |
|
"loss": 5.824, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.5006016847172083, |
|
"grad_norm": 6.963297367095947, |
|
"learning_rate": 7.254901960784314e-05, |
|
"loss": 6.4633, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.51985559566787, |
|
"grad_norm": 6.612102508544922, |
|
"learning_rate": 7.156862745098039e-05, |
|
"loss": 6.3979, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.539109506618532, |
|
"grad_norm": 11.322911262512207, |
|
"learning_rate": 7.058823529411765e-05, |
|
"loss": 6.2103, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.558363417569194, |
|
"grad_norm": 21.0396671295166, |
|
"learning_rate": 6.96078431372549e-05, |
|
"loss": 5.6772, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.577617328519856, |
|
"grad_norm": 13.040122985839844, |
|
"learning_rate": 6.862745098039216e-05, |
|
"loss": 6.0072, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.5968712394705173, |
|
"grad_norm": 13.392056465148926, |
|
"learning_rate": 6.764705882352942e-05, |
|
"loss": 6.0408, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.6161251504211793, |
|
"grad_norm": 9.345407485961914, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 6.345, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.6353790613718413, |
|
"grad_norm": 9.068965911865234, |
|
"learning_rate": 6.568627450980392e-05, |
|
"loss": 6.0518, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.654632972322503, |
|
"grad_norm": 9.924796104431152, |
|
"learning_rate": 6.470588235294118e-05, |
|
"loss": 6.404, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.673886883273165, |
|
"grad_norm": 11.512860298156738, |
|
"learning_rate": 6.372549019607843e-05, |
|
"loss": 5.849, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.693140794223827, |
|
"grad_norm": 9.558600425720215, |
|
"learning_rate": 6.274509803921569e-05, |
|
"loss": 6.0751, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.7123947051744883, |
|
"grad_norm": 14.465291976928711, |
|
"learning_rate": 6.176470588235295e-05, |
|
"loss": 5.5432, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.7316486161251503, |
|
"grad_norm": 14.843960762023926, |
|
"learning_rate": 6.078431372549019e-05, |
|
"loss": 5.8858, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.7509025270758123, |
|
"grad_norm": 8.04920768737793, |
|
"learning_rate": 5.980392156862745e-05, |
|
"loss": 5.8131, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.7701564380264743, |
|
"grad_norm": 9.71105670928955, |
|
"learning_rate": 5.882352941176471e-05, |
|
"loss": 5.9374, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.7894103489771362, |
|
"grad_norm": 5.949017524719238, |
|
"learning_rate": 5.784313725490197e-05, |
|
"loss": 6.4545, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.808664259927798, |
|
"grad_norm": 7.233414649963379, |
|
"learning_rate": 5.6862745098039215e-05, |
|
"loss": 6.1215, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.8279181708784598, |
|
"grad_norm": 9.445034980773926, |
|
"learning_rate": 5.588235294117647e-05, |
|
"loss": 5.7711, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.8471720818291217, |
|
"grad_norm": 6.351881980895996, |
|
"learning_rate": 5.490196078431373e-05, |
|
"loss": 6.3073, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.8664259927797833, |
|
"grad_norm": 5.955877304077148, |
|
"learning_rate": 5.392156862745098e-05, |
|
"loss": 6.2675, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.8856799037304453, |
|
"grad_norm": 7.2687764167785645, |
|
"learning_rate": 5.294117647058824e-05, |
|
"loss": 6.2382, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.8856799037304453, |
|
"eval_clap": 0.07656023651361465, |
|
"eval_loss": 6.118464469909668, |
|
"eval_runtime": 165.7635, |
|
"eval_samples_per_second": 0.097, |
|
"eval_steps_per_second": 0.097, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.9049338146811072, |
|
"grad_norm": 7.581653594970703, |
|
"learning_rate": 5.1960784313725495e-05, |
|
"loss": 6.1951, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.9241877256317688, |
|
"grad_norm": 5.309889793395996, |
|
"learning_rate": 5.0980392156862745e-05, |
|
"loss": 6.1416, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.9434416365824307, |
|
"grad_norm": 10.804561614990234, |
|
"learning_rate": 5e-05, |
|
"loss": 6.4203, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.9626955475330927, |
|
"grad_norm": 7.452890872955322, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 6.3695, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.9819494584837543, |
|
"grad_norm": 7.373142719268799, |
|
"learning_rate": 4.803921568627452e-05, |
|
"loss": 6.0469, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 6.503188610076904, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 5.5774, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.019253910950662, |
|
"grad_norm": 6.571235656738281, |
|
"learning_rate": 4.607843137254902e-05, |
|
"loss": 6.3784, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 3.0385078219013235, |
|
"grad_norm": 6.059790134429932, |
|
"learning_rate": 4.5098039215686275e-05, |
|
"loss": 6.2638, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 3.0577617328519855, |
|
"grad_norm": 7.978560447692871, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 6.2388, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 3.0770156438026475, |
|
"grad_norm": 4.5174479484558105, |
|
"learning_rate": 4.313725490196079e-05, |
|
"loss": 6.1811, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.0962695547533094, |
|
"grad_norm": 16.497093200683594, |
|
"learning_rate": 4.215686274509804e-05, |
|
"loss": 5.8567, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 3.115523465703971, |
|
"grad_norm": 10.036762237548828, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 5.7851, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 3.134777376654633, |
|
"grad_norm": 8.312905311584473, |
|
"learning_rate": 4.0196078431372555e-05, |
|
"loss": 6.3701, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 3.154031287605295, |
|
"grad_norm": 6.305182456970215, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 6.2461, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 3.1732851985559565, |
|
"grad_norm": 6.297240257263184, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 6.1583, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.1925391095066185, |
|
"grad_norm": 6.377700328826904, |
|
"learning_rate": 3.725490196078432e-05, |
|
"loss": 5.8368, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 3.2117930204572804, |
|
"grad_norm": 6.20255708694458, |
|
"learning_rate": 3.627450980392157e-05, |
|
"loss": 6.1394, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 3.2310469314079424, |
|
"grad_norm": 10.172269821166992, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 5.99, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.250300842358604, |
|
"grad_norm": 12.56449031829834, |
|
"learning_rate": 3.431372549019608e-05, |
|
"loss": 6.2823, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 3.269554753309266, |
|
"grad_norm": 6.517347812652588, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 6.4417, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.288808664259928, |
|
"grad_norm": 7.165337085723877, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 6.1048, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 3.30806257521059, |
|
"grad_norm": 14.79480266571045, |
|
"learning_rate": 3.137254901960784e-05, |
|
"loss": 5.9012, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 3.3273164861612514, |
|
"grad_norm": 10.55307388305664, |
|
"learning_rate": 3.0392156862745097e-05, |
|
"loss": 6.0419, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 3.3465703971119134, |
|
"grad_norm": 7.354953289031982, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 5.9871, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 3.3658243080625754, |
|
"grad_norm": 7.013256549835205, |
|
"learning_rate": 2.8431372549019608e-05, |
|
"loss": 6.3169, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.3658243080625754, |
|
"eval_clap": 0.09689466655254364, |
|
"eval_loss": 6.116217613220215, |
|
"eval_runtime": 165.7689, |
|
"eval_samples_per_second": 0.097, |
|
"eval_steps_per_second": 0.097, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.385078219013237, |
|
"grad_norm": 8.007953643798828, |
|
"learning_rate": 2.7450980392156865e-05, |
|
"loss": 6.0573, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 3.404332129963899, |
|
"grad_norm": 7.166982173919678, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 6.3097, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 3.423586040914561, |
|
"grad_norm": 5.868830680847168, |
|
"learning_rate": 2.5490196078431373e-05, |
|
"loss": 6.1856, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 3.4428399518652224, |
|
"grad_norm": 7.172518253326416, |
|
"learning_rate": 2.4509803921568626e-05, |
|
"loss": 6.284, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 3.4620938628158844, |
|
"grad_norm": 5.972955226898193, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 6.1067, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.4813477737665464, |
|
"grad_norm": 5.716938495635986, |
|
"learning_rate": 2.2549019607843138e-05, |
|
"loss": 6.2792, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 3.5006016847172083, |
|
"grad_norm": 5.647866249084473, |
|
"learning_rate": 2.1568627450980395e-05, |
|
"loss": 6.336, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 3.51985559566787, |
|
"grad_norm": 7.596288204193115, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 6.1188, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 3.539109506618532, |
|
"grad_norm": 9.767680168151855, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 6.3607, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 3.558363417569194, |
|
"grad_norm": 5.301209926605225, |
|
"learning_rate": 1.862745098039216e-05, |
|
"loss": 6.0671, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.577617328519856, |
|
"grad_norm": 6.347781658172607, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 6.1538, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 3.5968712394705173, |
|
"grad_norm": 6.653684139251709, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 6.1422, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 3.6161251504211793, |
|
"grad_norm": 9.340754508972168, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 5.6681, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.6353790613718413, |
|
"grad_norm": 6.159310340881348, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 5.8408, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.654632972322503, |
|
"grad_norm": 7.5495195388793945, |
|
"learning_rate": 1.3725490196078432e-05, |
|
"loss": 6.1853, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.673886883273165, |
|
"grad_norm": 6.215287208557129, |
|
"learning_rate": 1.2745098039215686e-05, |
|
"loss": 6.082, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.693140794223827, |
|
"grad_norm": 5.863905906677246, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 6.0772, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.7123947051744883, |
|
"grad_norm": 5.785052299499512, |
|
"learning_rate": 1.0784313725490197e-05, |
|
"loss": 6.2809, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 3.7316486161251503, |
|
"grad_norm": 8.62579345703125, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 5.9173, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 3.7509025270758123, |
|
"grad_norm": 8.095368385314941, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 6.2614, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.7701564380264743, |
|
"grad_norm": 6.416041851043701, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 5.7276, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 3.7894103489771362, |
|
"grad_norm": 6.0362868309021, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 6.1875, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 3.808664259927798, |
|
"grad_norm": 6.641626834869385, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 6.0641, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.8279181708784598, |
|
"grad_norm": 6.249925136566162, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 6.4255, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 3.8471720818291217, |
|
"grad_norm": 7.856912136077881, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 5.7667, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.8471720818291217, |
|
"eval_clap": 0.11432015895843506, |
|
"eval_loss": 6.130455017089844, |
|
"eval_runtime": 165.7823, |
|
"eval_samples_per_second": 0.097, |
|
"eval_steps_per_second": 0.097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.8664259927797833, |
|
"grad_norm": 8.209946632385254, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 6.1598, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.8856799037304453, |
|
"grad_norm": 7.541530609130859, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 5.7201, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 3.9049338146811072, |
|
"grad_norm": 36.531105041503906, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 6.0873, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 3.9241877256317688, |
|
"grad_norm": 6.220560073852539, |
|
"learning_rate": 0.0, |
|
"loss": 6.0892, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.9241877256317688, |
|
"step": 204, |
|
"total_flos": 784195045500888.0, |
|
"train_loss": 6.39456293629665, |
|
"train_runtime": 14405.0011, |
|
"train_samples_per_second": 0.231, |
|
"train_steps_per_second": 0.014 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 784195045500888.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|