llama-3-Korean-8B-r-v-0.1 / trainer_state.json
virnect-rjpark's picture
Upload 11 files
57381c9 verified
raw
history blame
No virus
17.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.42533081285444235,
"eval_steps": 25,
"global_step": 1125,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00945179584120983,
"grad_norm": 0.48126843571662903,
"learning_rate": 0.0002,
"loss": 1.4186,
"step": 25
},
{
"epoch": 0.00945179584120983,
"eval_loss": 1.2822998762130737,
"eval_runtime": 1560.6226,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 25
},
{
"epoch": 0.01890359168241966,
"grad_norm": 0.8693311810493469,
"learning_rate": 0.0002,
"loss": 1.2478,
"step": 50
},
{
"epoch": 0.01890359168241966,
"eval_loss": 1.261049747467041,
"eval_runtime": 1561.6033,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 50
},
{
"epoch": 0.02835538752362949,
"grad_norm": 0.4594016969203949,
"learning_rate": 0.0002,
"loss": 1.1961,
"step": 75
},
{
"epoch": 0.02835538752362949,
"eval_loss": 1.2359907627105713,
"eval_runtime": 1561.4875,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 75
},
{
"epoch": 0.03780718336483932,
"grad_norm": 0.7460442185401917,
"learning_rate": 0.0002,
"loss": 1.245,
"step": 100
},
{
"epoch": 0.03780718336483932,
"eval_loss": 1.2357805967330933,
"eval_runtime": 1561.6317,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 100
},
{
"epoch": 0.04725897920604915,
"grad_norm": 0.37976986169815063,
"learning_rate": 0.0002,
"loss": 1.2213,
"step": 125
},
{
"epoch": 0.04725897920604915,
"eval_loss": 1.2154258489608765,
"eval_runtime": 1561.4032,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 125
},
{
"epoch": 0.05671077504725898,
"grad_norm": 0.6762637495994568,
"learning_rate": 0.0002,
"loss": 1.199,
"step": 150
},
{
"epoch": 0.05671077504725898,
"eval_loss": 1.2192034721374512,
"eval_runtime": 1561.5162,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 150
},
{
"epoch": 0.0661625708884688,
"grad_norm": 0.3414202034473419,
"learning_rate": 0.0002,
"loss": 1.1825,
"step": 175
},
{
"epoch": 0.0661625708884688,
"eval_loss": 1.199916124343872,
"eval_runtime": 1561.7104,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 175
},
{
"epoch": 0.07561436672967864,
"grad_norm": 0.8801635503768921,
"learning_rate": 0.0002,
"loss": 1.1358,
"step": 200
},
{
"epoch": 0.07561436672967864,
"eval_loss": 1.201659083366394,
"eval_runtime": 1561.6394,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 200
},
{
"epoch": 0.08506616257088846,
"grad_norm": 0.31596821546554565,
"learning_rate": 0.0002,
"loss": 1.2173,
"step": 225
},
{
"epoch": 0.08506616257088846,
"eval_loss": 1.18569016456604,
"eval_runtime": 1561.8408,
"eval_samples_per_second": 0.846,
"eval_steps_per_second": 0.212,
"step": 225
},
{
"epoch": 0.0945179584120983,
"grad_norm": 0.9426243305206299,
"learning_rate": 0.0002,
"loss": 1.1652,
"step": 250
},
{
"epoch": 0.0945179584120983,
"eval_loss": 1.1847585439682007,
"eval_runtime": 1561.46,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 250
},
{
"epoch": 0.10396975425330812,
"grad_norm": 0.3340831398963928,
"learning_rate": 0.0002,
"loss": 1.1563,
"step": 275
},
{
"epoch": 0.10396975425330812,
"eval_loss": 1.1764663457870483,
"eval_runtime": 1563.551,
"eval_samples_per_second": 0.846,
"eval_steps_per_second": 0.212,
"step": 275
},
{
"epoch": 0.11342155009451796,
"grad_norm": 1.1844408512115479,
"learning_rate": 0.0002,
"loss": 1.1976,
"step": 300
},
{
"epoch": 0.11342155009451796,
"eval_loss": 1.182220697402954,
"eval_runtime": 1562.1264,
"eval_samples_per_second": 0.846,
"eval_steps_per_second": 0.212,
"step": 300
},
{
"epoch": 0.12287334593572778,
"grad_norm": 0.35529959201812744,
"learning_rate": 0.0002,
"loss": 1.197,
"step": 325
},
{
"epoch": 0.12287334593572778,
"eval_loss": 1.170316219329834,
"eval_runtime": 1561.289,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 325
},
{
"epoch": 0.1323251417769376,
"grad_norm": 0.644234836101532,
"learning_rate": 0.0002,
"loss": 1.1317,
"step": 350
},
{
"epoch": 0.1323251417769376,
"eval_loss": 1.173732876777649,
"eval_runtime": 1561.2179,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 350
},
{
"epoch": 0.14177693761814744,
"grad_norm": 0.38344722986221313,
"learning_rate": 0.0002,
"loss": 1.2229,
"step": 375
},
{
"epoch": 0.14177693761814744,
"eval_loss": 1.1632750034332275,
"eval_runtime": 1562.0523,
"eval_samples_per_second": 0.846,
"eval_steps_per_second": 0.212,
"step": 375
},
{
"epoch": 0.15122873345935728,
"grad_norm": 0.709377646446228,
"learning_rate": 0.0002,
"loss": 1.1853,
"step": 400
},
{
"epoch": 0.15122873345935728,
"eval_loss": 1.1692676544189453,
"eval_runtime": 1561.2568,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 400
},
{
"epoch": 0.16068052930056712,
"grad_norm": 0.34974658489227295,
"learning_rate": 0.0002,
"loss": 1.1479,
"step": 425
},
{
"epoch": 0.16068052930056712,
"eval_loss": 1.1600748300552368,
"eval_runtime": 1562.1908,
"eval_samples_per_second": 0.846,
"eval_steps_per_second": 0.212,
"step": 425
},
{
"epoch": 0.17013232514177692,
"grad_norm": 0.8809393644332886,
"learning_rate": 0.0002,
"loss": 1.1047,
"step": 450
},
{
"epoch": 0.17013232514177692,
"eval_loss": 1.1649720668792725,
"eval_runtime": 1563.0297,
"eval_samples_per_second": 0.846,
"eval_steps_per_second": 0.212,
"step": 450
},
{
"epoch": 0.17958412098298676,
"grad_norm": 0.319968581199646,
"learning_rate": 0.0002,
"loss": 1.1477,
"step": 475
},
{
"epoch": 0.17958412098298676,
"eval_loss": 1.1558725833892822,
"eval_runtime": 1561.2187,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 475
},
{
"epoch": 0.1890359168241966,
"grad_norm": 0.7769630551338196,
"learning_rate": 0.0002,
"loss": 1.1831,
"step": 500
},
{
"epoch": 0.1890359168241966,
"eval_loss": 1.162941336631775,
"eval_runtime": 1561.4384,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 500
},
{
"epoch": 0.19848771266540643,
"grad_norm": 0.3040992319583893,
"learning_rate": 0.0002,
"loss": 1.134,
"step": 525
},
{
"epoch": 0.19848771266540643,
"eval_loss": 1.153849720954895,
"eval_runtime": 1561.176,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 525
},
{
"epoch": 0.20793950850661624,
"grad_norm": 0.656995415687561,
"learning_rate": 0.0002,
"loss": 1.1366,
"step": 550
},
{
"epoch": 0.20793950850661624,
"eval_loss": 1.156500220298767,
"eval_runtime": 1561.228,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 550
},
{
"epoch": 0.21739130434782608,
"grad_norm": 0.32160601019859314,
"learning_rate": 0.0002,
"loss": 1.1581,
"step": 575
},
{
"epoch": 0.21739130434782608,
"eval_loss": 1.1488285064697266,
"eval_runtime": 1561.286,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 575
},
{
"epoch": 0.22684310018903592,
"grad_norm": 0.5169605016708374,
"learning_rate": 0.0002,
"loss": 1.1179,
"step": 600
},
{
"epoch": 0.22684310018903592,
"eval_loss": 1.1587059497833252,
"eval_runtime": 1561.443,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 600
},
{
"epoch": 0.23629489603024575,
"grad_norm": 0.3807673156261444,
"learning_rate": 0.0002,
"loss": 1.1654,
"step": 625
},
{
"epoch": 0.23629489603024575,
"eval_loss": 1.146795630455017,
"eval_runtime": 1561.4729,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 625
},
{
"epoch": 0.24574669187145556,
"grad_norm": 1.206275224685669,
"learning_rate": 0.0002,
"loss": 1.1549,
"step": 650
},
{
"epoch": 0.24574669187145556,
"eval_loss": 1.149159550666809,
"eval_runtime": 1561.5158,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 650
},
{
"epoch": 0.2551984877126654,
"grad_norm": 0.3218563497066498,
"learning_rate": 0.0002,
"loss": 1.147,
"step": 675
},
{
"epoch": 0.2551984877126654,
"eval_loss": 1.1431602239608765,
"eval_runtime": 1561.424,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 675
},
{
"epoch": 0.2646502835538752,
"grad_norm": 0.7758462429046631,
"learning_rate": 0.0002,
"loss": 1.1113,
"step": 700
},
{
"epoch": 0.2646502835538752,
"eval_loss": 1.1470929384231567,
"eval_runtime": 1561.3413,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 700
},
{
"epoch": 0.2741020793950851,
"grad_norm": 0.3400532901287079,
"learning_rate": 0.0002,
"loss": 1.1684,
"step": 725
},
{
"epoch": 0.2741020793950851,
"eval_loss": 1.1409646272659302,
"eval_runtime": 1561.1615,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 725
},
{
"epoch": 0.2835538752362949,
"grad_norm": 0.48636239767074585,
"learning_rate": 0.0002,
"loss": 1.1016,
"step": 750
},
{
"epoch": 0.2835538752362949,
"eval_loss": 1.1419570446014404,
"eval_runtime": 1561.247,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 750
},
{
"epoch": 0.29300567107750475,
"grad_norm": 0.3466539978981018,
"learning_rate": 0.0002,
"loss": 1.1589,
"step": 775
},
{
"epoch": 0.29300567107750475,
"eval_loss": 1.137436032295227,
"eval_runtime": 1561.3303,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 775
},
{
"epoch": 0.30245746691871456,
"grad_norm": 1.0184762477874756,
"learning_rate": 0.0002,
"loss": 1.1275,
"step": 800
},
{
"epoch": 0.30245746691871456,
"eval_loss": 1.1429524421691895,
"eval_runtime": 1561.4223,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 800
},
{
"epoch": 0.31190926275992437,
"grad_norm": 0.3569687306880951,
"learning_rate": 0.0002,
"loss": 1.2014,
"step": 825
},
{
"epoch": 0.31190926275992437,
"eval_loss": 1.134521722793579,
"eval_runtime": 1561.5607,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 825
},
{
"epoch": 0.32136105860113423,
"grad_norm": 0.503614068031311,
"learning_rate": 0.0002,
"loss": 1.0947,
"step": 850
},
{
"epoch": 0.32136105860113423,
"eval_loss": 1.1380345821380615,
"eval_runtime": 1561.4636,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 850
},
{
"epoch": 0.33081285444234404,
"grad_norm": 0.4224971532821655,
"learning_rate": 0.0002,
"loss": 1.1505,
"step": 875
},
{
"epoch": 0.33081285444234404,
"eval_loss": 1.1311566829681396,
"eval_runtime": 1561.5445,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 875
},
{
"epoch": 0.34026465028355385,
"grad_norm": 0.6001178026199341,
"learning_rate": 0.0002,
"loss": 1.1121,
"step": 900
},
{
"epoch": 0.34026465028355385,
"eval_loss": 1.1359593868255615,
"eval_runtime": 1561.55,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 900
},
{
"epoch": 0.3497164461247637,
"grad_norm": 0.3645350933074951,
"learning_rate": 0.0002,
"loss": 1.1452,
"step": 925
},
{
"epoch": 0.3497164461247637,
"eval_loss": 1.1279844045639038,
"eval_runtime": 1561.6948,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 925
},
{
"epoch": 0.3591682419659735,
"grad_norm": 0.6315143704414368,
"learning_rate": 0.0002,
"loss": 1.0865,
"step": 950
},
{
"epoch": 0.3591682419659735,
"eval_loss": 1.1323318481445312,
"eval_runtime": 1561.5263,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 950
},
{
"epoch": 0.3686200378071834,
"grad_norm": 0.3632996380329132,
"learning_rate": 0.0002,
"loss": 1.1383,
"step": 975
},
{
"epoch": 0.3686200378071834,
"eval_loss": 1.1256133317947388,
"eval_runtime": 1561.413,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 975
},
{
"epoch": 0.3780718336483932,
"grad_norm": 0.8775736689567566,
"learning_rate": 0.0002,
"loss": 1.1071,
"step": 1000
},
{
"epoch": 0.3780718336483932,
"eval_loss": 1.130606770515442,
"eval_runtime": 1561.5903,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 1000
},
{
"epoch": 0.387523629489603,
"grad_norm": 0.32248276472091675,
"learning_rate": 0.0002,
"loss": 1.1603,
"step": 1025
},
{
"epoch": 0.387523629489603,
"eval_loss": 1.122152328491211,
"eval_runtime": 1561.5582,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 1025
},
{
"epoch": 0.39697542533081287,
"grad_norm": 1.2496217489242554,
"learning_rate": 0.0002,
"loss": 1.0542,
"step": 1050
},
{
"epoch": 0.39697542533081287,
"eval_loss": 1.129094123840332,
"eval_runtime": 1561.5299,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 1050
},
{
"epoch": 0.4064272211720227,
"grad_norm": 0.31586310267448425,
"learning_rate": 0.0002,
"loss": 1.1224,
"step": 1075
},
{
"epoch": 0.4064272211720227,
"eval_loss": 1.1187065839767456,
"eval_runtime": 1561.5901,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 1075
},
{
"epoch": 0.4158790170132325,
"grad_norm": 0.944985032081604,
"learning_rate": 0.0002,
"loss": 1.133,
"step": 1100
},
{
"epoch": 0.4158790170132325,
"eval_loss": 1.122226595878601,
"eval_runtime": 1561.6201,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 1100
},
{
"epoch": 0.42533081285444235,
"grad_norm": 0.3063657879829407,
"learning_rate": 0.0002,
"loss": 1.1122,
"step": 1125
},
{
"epoch": 0.42533081285444235,
"eval_loss": 1.1147044897079468,
"eval_runtime": 1561.5596,
"eval_samples_per_second": 0.847,
"eval_steps_per_second": 0.212,
"step": 1125
}
],
"logging_steps": 25,
"max_steps": 2645,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"total_flos": 8.654158217045606e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}