bhuvanmdev's picture
Upload folder using huggingface_hub
f36fc1c verified
raw
history blame
7.43 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.08587376556462001,
"eval_steps": 500,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0021468441391155,
"grad_norm": 1.7282733917236328,
"learning_rate": 0.0004989265779304422,
"loss": 1.4129,
"step": 10
},
{
"epoch": 0.004293688278231,
"grad_norm": 2.1508498191833496,
"learning_rate": 0.0004978531558608846,
"loss": 1.2225,
"step": 20
},
{
"epoch": 0.006440532417346501,
"grad_norm": 1.6386512517929077,
"learning_rate": 0.0004967797337913268,
"loss": 1.1663,
"step": 30
},
{
"epoch": 0.008587376556462,
"grad_norm": 1.2367421388626099,
"learning_rate": 0.000495706311721769,
"loss": 1.1373,
"step": 40
},
{
"epoch": 0.010734220695577501,
"grad_norm": 1.2300989627838135,
"learning_rate": 0.0004946328896522112,
"loss": 1.1143,
"step": 50
},
{
"epoch": 0.012881064834693002,
"grad_norm": 1.1807990074157715,
"learning_rate": 0.0004935594675826536,
"loss": 1.0937,
"step": 60
},
{
"epoch": 0.015027908973808502,
"grad_norm": 0.9375188946723938,
"learning_rate": 0.0004924860455130958,
"loss": 1.0732,
"step": 70
},
{
"epoch": 0.017174753112924,
"grad_norm": 0.9801538586616516,
"learning_rate": 0.000491412623443538,
"loss": 1.0369,
"step": 80
},
{
"epoch": 0.019321597252039503,
"grad_norm": 0.9229792356491089,
"learning_rate": 0.0004903392013739802,
"loss": 1.0093,
"step": 90
},
{
"epoch": 0.021468441391155002,
"grad_norm": 1.011305570602417,
"learning_rate": 0.0004892657793044225,
"loss": 1.0161,
"step": 100
},
{
"epoch": 0.0236152855302705,
"grad_norm": 0.9356452822685242,
"learning_rate": 0.00048819235723486477,
"loss": 0.9939,
"step": 110
},
{
"epoch": 0.025762129669386003,
"grad_norm": 1.0092449188232422,
"learning_rate": 0.00048711893516530704,
"loss": 0.9647,
"step": 120
},
{
"epoch": 0.027908973808501502,
"grad_norm": 0.9663442373275757,
"learning_rate": 0.0004860455130957492,
"loss": 0.9595,
"step": 130
},
{
"epoch": 0.030055817947617004,
"grad_norm": 1.1502243280410767,
"learning_rate": 0.0004849720910261915,
"loss": 0.9422,
"step": 140
},
{
"epoch": 0.0322026620867325,
"grad_norm": 0.970102846622467,
"learning_rate": 0.00048389866895663376,
"loss": 0.945,
"step": 150
},
{
"epoch": 0.034349506225848,
"grad_norm": 1.2466392517089844,
"learning_rate": 0.00048282524688707604,
"loss": 0.9385,
"step": 160
},
{
"epoch": 0.0364963503649635,
"grad_norm": 1.0010186433792114,
"learning_rate": 0.00048175182481751826,
"loss": 0.9301,
"step": 170
},
{
"epoch": 0.038643194504079006,
"grad_norm": 1.2516905069351196,
"learning_rate": 0.0004806784027479605,
"loss": 0.919,
"step": 180
},
{
"epoch": 0.040790038643194505,
"grad_norm": 0.8497525453567505,
"learning_rate": 0.00047960498067840275,
"loss": 0.9054,
"step": 190
},
{
"epoch": 0.042936882782310004,
"grad_norm": 1.0371205806732178,
"learning_rate": 0.00047853155860884503,
"loss": 0.9109,
"step": 200
},
{
"epoch": 0.0450837269214255,
"grad_norm": 1.3313541412353516,
"learning_rate": 0.00047745813653928725,
"loss": 0.9131,
"step": 210
},
{
"epoch": 0.047230571060541,
"grad_norm": 0.9448315501213074,
"learning_rate": 0.0004763847144697295,
"loss": 0.9014,
"step": 220
},
{
"epoch": 0.04937741519965651,
"grad_norm": 1.274882435798645,
"learning_rate": 0.00047531129240017175,
"loss": 0.8786,
"step": 230
},
{
"epoch": 0.051524259338772006,
"grad_norm": 1.3116368055343628,
"learning_rate": 0.000474237870330614,
"loss": 0.9075,
"step": 240
},
{
"epoch": 0.053671103477887505,
"grad_norm": 0.9970440864562988,
"learning_rate": 0.00047316444826105624,
"loss": 0.8932,
"step": 250
},
{
"epoch": 0.055817947617003004,
"grad_norm": 1.698472499847412,
"learning_rate": 0.0004720910261914985,
"loss": 0.8838,
"step": 260
},
{
"epoch": 0.0579647917561185,
"grad_norm": 1.0129982233047485,
"learning_rate": 0.0004710176041219408,
"loss": 0.8779,
"step": 270
},
{
"epoch": 0.06011163589523401,
"grad_norm": 1.0594947338104248,
"learning_rate": 0.00046994418205238296,
"loss": 0.8631,
"step": 280
},
{
"epoch": 0.06225848003434951,
"grad_norm": 0.7768178582191467,
"learning_rate": 0.00046887075998282524,
"loss": 0.8666,
"step": 290
},
{
"epoch": 0.064405324173465,
"grad_norm": 0.9108049869537354,
"learning_rate": 0.0004677973379132675,
"loss": 0.8676,
"step": 300
},
{
"epoch": 0.06655216831258051,
"grad_norm": 1.4127992391586304,
"learning_rate": 0.0004667239158437098,
"loss": 0.8951,
"step": 310
},
{
"epoch": 0.068699012451696,
"grad_norm": 1.1507939100265503,
"learning_rate": 0.000465650493774152,
"loss": 0.863,
"step": 320
},
{
"epoch": 0.07084585659081151,
"grad_norm": 1.1579265594482422,
"learning_rate": 0.00046457707170459423,
"loss": 0.8716,
"step": 330
},
{
"epoch": 0.072992700729927,
"grad_norm": 0.9873006343841553,
"learning_rate": 0.0004635036496350365,
"loss": 0.8569,
"step": 340
},
{
"epoch": 0.07513954486904251,
"grad_norm": 1.1990203857421875,
"learning_rate": 0.0004624302275654788,
"loss": 0.8776,
"step": 350
},
{
"epoch": 0.07728638900815801,
"grad_norm": 1.1173065900802612,
"learning_rate": 0.000461356805495921,
"loss": 0.865,
"step": 360
},
{
"epoch": 0.0794332331472735,
"grad_norm": 1.2493510246276855,
"learning_rate": 0.0004602833834263633,
"loss": 0.8609,
"step": 370
},
{
"epoch": 0.08158007728638901,
"grad_norm": 1.1254737377166748,
"learning_rate": 0.0004592099613568055,
"loss": 0.8697,
"step": 380
},
{
"epoch": 0.0837269214255045,
"grad_norm": 1.1009331941604614,
"learning_rate": 0.0004581365392872477,
"loss": 0.8653,
"step": 390
},
{
"epoch": 0.08587376556462001,
"grad_norm": 1.3970990180969238,
"learning_rate": 0.00045706311721769,
"loss": 0.8542,
"step": 400
}
],
"logging_steps": 10,
"max_steps": 4658,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 2.976872891771059e+16,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}