{ "best_metric": 0.95821899, "best_model_checkpoint": "/output/v0-20250302-194927/checkpoint-93", "epoch": 0.9993284083277367, "eval_steps": 50, "global_step": 93, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010745466756212223, "grad_norm": 1.4417994022369385, "learning_rate": 2e-05, "loss": 1.6095050573349, "memory(GiB)": 15.84, "step": 1, "token_acc": 0.6413415468856948, "train_speed(iter/s)": 0.384326 }, { "epoch": 0.053727333781061114, "grad_norm": 1.530918836593628, "learning_rate": 0.0001, "loss": 1.64873206615448, "memory(GiB)": 18.0, "step": 5, "token_acc": 0.6540549432472446, "train_speed(iter/s)": 0.487448 }, { "epoch": 0.10745466756212223, "grad_norm": 0.9890601634979248, "learning_rate": 9.92055602168058e-05, "loss": 1.4899453163146972, "memory(GiB)": 18.0, "step": 10, "token_acc": 0.6538794346697433, "train_speed(iter/s)": 0.506675 }, { "epoch": 0.16118200134318333, "grad_norm": 0.5164389610290527, "learning_rate": 9.68474862499881e-05, "loss": 1.166579246520996, "memory(GiB)": 18.0, "step": 15, "token_acc": 0.6806848582129481, "train_speed(iter/s)": 0.506317 }, { "epoch": 0.21490933512424445, "grad_norm": 0.5342268943786621, "learning_rate": 9.300071201038503e-05, "loss": 1.1805984497070312, "memory(GiB)": 19.04, "step": 20, "token_acc": 0.6795674445076836, "train_speed(iter/s)": 0.512768 }, { "epoch": 0.2686366689053056, "grad_norm": 0.4971584677696228, "learning_rate": 8.778747871771292e-05, "loss": 1.141501235961914, "memory(GiB)": 19.04, "step": 25, "token_acc": 0.6755593803786575, "train_speed(iter/s)": 0.512175 }, { "epoch": 0.32236400268636667, "grad_norm": 0.8472046256065369, "learning_rate": 8.13734503690426e-05, "loss": 1.2188076019287108, "memory(GiB)": 19.04, "step": 30, "token_acc": 0.6654403311106002, "train_speed(iter/s)": 0.513303 }, { "epoch": 0.3760913364674278, "grad_norm": 0.593246579170227, "learning_rate": 7.396244933600285e-05, "loss": 1.1748350143432618, "memory(GiB)": 19.04, "step": 35, "token_acc": 0.6680231031766868, "train_speed(iter/s)": 0.516033 }, { "epoch": 0.4298186702484889, "grad_norm": 0.6569189429283142, "learning_rate": 6.578997938075125e-05, "loss": 1.2335172653198243, "memory(GiB)": 19.04, "step": 40, "token_acc": 0.6621774701069856, "train_speed(iter/s)": 0.5181 }, { "epoch": 0.48354600402955006, "grad_norm": 0.5732371211051941, "learning_rate": 5.7115741913664264e-05, "loss": 1.146481990814209, "memory(GiB)": 19.04, "step": 45, "token_acc": 0.6752373417721519, "train_speed(iter/s)": 0.520032 }, { "epoch": 0.5372733378106112, "grad_norm": 0.46477988362312317, "learning_rate": 4.821538330805098e-05, "loss": 1.0264747619628907, "memory(GiB)": 19.04, "step": 50, "token_acc": 0.7175245098039216, "train_speed(iter/s)": 0.52028 }, { "epoch": 0.5372733378106112, "eval_loss": 0.9670917987823486, "eval_runtime": 0.5486, "eval_samples_per_second": 20.052, "eval_steps_per_second": 20.052, "eval_token_acc": 0.6802189210320563, "step": 50 }, { "epoch": 0.5910006715916722, "grad_norm": 0.6402655839920044, "learning_rate": 3.937173552235117e-05, "loss": 1.1301018714904785, "memory(GiB)": 19.04, "step": 55, "token_acc": 0.6768744683436626, "train_speed(iter/s)": 0.518567 }, { "epoch": 0.6447280053727333, "grad_norm": 0.6393471956253052, "learning_rate": 3.086582838174551e-05, "loss": 1.0084449768066406, "memory(GiB)": 19.04, "step": 60, "token_acc": 0.7076395196286205, "train_speed(iter/s)": 0.516553 }, { "epoch": 0.6984553391537945, "grad_norm": 0.5735301375389099, "learning_rate": 2.296795912722014e-05, "loss": 1.1564825057983399, "memory(GiB)": 19.04, "step": 65, "token_acc": 0.6742996034284252, "train_speed(iter/s)": 0.517388 }, { "epoch": 0.7521826729348556, "grad_norm": 0.3896394968032837, "learning_rate": 1.592910302030544e-05, "loss": 1.0521112442016602, "memory(GiB)": 19.04, "step": 70, "token_acc": 0.6966670537684357, "train_speed(iter/s)": 0.517074 }, { "epoch": 0.8059100067159167, "grad_norm": 0.6815643310546875, "learning_rate": 9.972937953781986e-06, "loss": 1.068821334838867, "memory(GiB)": 19.04, "step": 75, "token_acc": 0.7018673535093368, "train_speed(iter/s)": 0.51957 }, { "epoch": 0.8596373404969778, "grad_norm": 0.5303964018821716, "learning_rate": 5.288736507014435e-06, "loss": 1.0596104621887208, "memory(GiB)": 19.04, "step": 80, "token_acc": 0.690947202934626, "train_speed(iter/s)": 0.520646 }, { "epoch": 0.9133646742780389, "grad_norm": 0.40747109055519104, "learning_rate": 2.0253513192751373e-06, "loss": 1.0225066184997558, "memory(GiB)": 19.04, "step": 85, "token_acc": 0.7125467164975975, "train_speed(iter/s)": 0.520666 }, { "epoch": 0.9670920080591001, "grad_norm": 0.5486978888511658, "learning_rate": 2.8648491140513266e-07, "loss": 1.1088424682617188, "memory(GiB)": 19.04, "step": 90, "token_acc": 0.6938313525749858, "train_speed(iter/s)": 0.520914 }, { "epoch": 0.9993284083277367, "eval_loss": 0.9582189917564392, "eval_runtime": 0.5101, "eval_samples_per_second": 21.565, "eval_steps_per_second": 21.565, "eval_token_acc": 0.6817826426896012, "step": 93 } ], "logging_steps": 5, "max_steps": 93, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8416047170248704.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }