Training in progress, step 1540, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671149168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41429cf5571ce18285a141d961caa3f0f825a72c254ea213d3abcf32d26cf102
|
3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 511723621
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:944e69c6bc8e57b2bf73b9346c469498e8ae2c8baaba15889acd8b4c7669601e
|
3 |
size 511723621
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4a52571f49e27c7eab31d97001843fe6934ba9e93fcb85a79308d038f084c2a
|
3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45489b655fbbe1ef592059bf1880ab60c4a3129300be9869b9bec372ecbdf27e
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"best_global_step":
|
3 |
-
"best_metric": 1.
|
4 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 20,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -11264,6 +11264,154 @@
|
|
11264 |
"eval_samples_per_second": 2.182,
|
11265 |
"eval_steps_per_second": 1.091,
|
11266 |
"step": 1520
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11267 |
}
|
11268 |
],
|
11269 |
"logging_steps": 1,
|
@@ -11278,7 +11426,7 @@
|
|
11278 |
"early_stopping_threshold": 0.0
|
11279 |
},
|
11280 |
"attributes": {
|
11281 |
-
"early_stopping_patience_counter":
|
11282 |
}
|
11283 |
},
|
11284 |
"TrainerControl": {
|
@@ -11292,7 +11440,7 @@
|
|
11292 |
"attributes": {}
|
11293 |
}
|
11294 |
},
|
11295 |
-
"total_flos": 2.
|
11296 |
"train_batch_size": 2,
|
11297 |
"trial_name": null,
|
11298 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_global_step": 1540,
|
3 |
+
"best_metric": 1.8535445928573608,
|
4 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-1540",
|
5 |
+
"epoch": 0.08761449621664676,
|
6 |
"eval_steps": 20,
|
7 |
+
"global_step": 1540,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
11264 |
"eval_samples_per_second": 2.182,
|
11265 |
"eval_steps_per_second": 1.091,
|
11266 |
"step": 1520
|
11267 |
+
},
|
11268 |
+
{
|
11269 |
+
"epoch": 0.08653353814644137,
|
11270 |
+
"grad_norm": 0.43319040536880493,
|
11271 |
+
"learning_rate": 0.0001995954582663373,
|
11272 |
+
"loss": 1.9505,
|
11273 |
+
"step": 1521
|
11274 |
+
},
|
11275 |
+
{
|
11276 |
+
"epoch": 0.08659043067645218,
|
11277 |
+
"grad_norm": 0.457925409078598,
|
11278 |
+
"learning_rate": 0.00019959492263420728,
|
11279 |
+
"loss": 1.7886,
|
11280 |
+
"step": 1522
|
11281 |
+
},
|
11282 |
+
{
|
11283 |
+
"epoch": 0.086647323206463,
|
11284 |
+
"grad_norm": 0.4349290728569031,
|
11285 |
+
"learning_rate": 0.00019959438664843054,
|
11286 |
+
"loss": 1.9586,
|
11287 |
+
"step": 1523
|
11288 |
+
},
|
11289 |
+
{
|
11290 |
+
"epoch": 0.0867042157364738,
|
11291 |
+
"grad_norm": 0.4642198979854584,
|
11292 |
+
"learning_rate": 0.0001995938503090089,
|
11293 |
+
"loss": 1.7877,
|
11294 |
+
"step": 1524
|
11295 |
+
},
|
11296 |
+
{
|
11297 |
+
"epoch": 0.08676110826648462,
|
11298 |
+
"grad_norm": 0.4763820171356201,
|
11299 |
+
"learning_rate": 0.00019959331361594438,
|
11300 |
+
"loss": 1.5194,
|
11301 |
+
"step": 1525
|
11302 |
+
},
|
11303 |
+
{
|
11304 |
+
"epoch": 0.08681800079649542,
|
11305 |
+
"grad_norm": 0.4941880702972412,
|
11306 |
+
"learning_rate": 0.00019959277656923883,
|
11307 |
+
"loss": 1.8568,
|
11308 |
+
"step": 1526
|
11309 |
+
},
|
11310 |
+
{
|
11311 |
+
"epoch": 0.08687489332650623,
|
11312 |
+
"grad_norm": 0.4190940260887146,
|
11313 |
+
"learning_rate": 0.00019959223916889412,
|
11314 |
+
"loss": 1.7104,
|
11315 |
+
"step": 1527
|
11316 |
+
},
|
11317 |
+
{
|
11318 |
+
"epoch": 0.08693178585651704,
|
11319 |
+
"grad_norm": 0.5554546117782593,
|
11320 |
+
"learning_rate": 0.0001995917014149122,
|
11321 |
+
"loss": 1.6767,
|
11322 |
+
"step": 1528
|
11323 |
+
},
|
11324 |
+
{
|
11325 |
+
"epoch": 0.08698867838652785,
|
11326 |
+
"grad_norm": 0.5352862477302551,
|
11327 |
+
"learning_rate": 0.00019959116330729498,
|
11328 |
+
"loss": 1.8475,
|
11329 |
+
"step": 1529
|
11330 |
+
},
|
11331 |
+
{
|
11332 |
+
"epoch": 0.08704557091653865,
|
11333 |
+
"grad_norm": 0.4974481165409088,
|
11334 |
+
"learning_rate": 0.00019959062484604438,
|
11335 |
+
"loss": 1.8802,
|
11336 |
+
"step": 1530
|
11337 |
+
},
|
11338 |
+
{
|
11339 |
+
"epoch": 0.08710246344654947,
|
11340 |
+
"grad_norm": 0.5199314951896667,
|
11341 |
+
"learning_rate": 0.00019959008603116226,
|
11342 |
+
"loss": 1.7377,
|
11343 |
+
"step": 1531
|
11344 |
+
},
|
11345 |
+
{
|
11346 |
+
"epoch": 0.08715935597656027,
|
11347 |
+
"grad_norm": 0.49977415800094604,
|
11348 |
+
"learning_rate": 0.0001995895468626506,
|
11349 |
+
"loss": 1.8371,
|
11350 |
+
"step": 1532
|
11351 |
+
},
|
11352 |
+
{
|
11353 |
+
"epoch": 0.08721624850657109,
|
11354 |
+
"grad_norm": 0.49237269163131714,
|
11355 |
+
"learning_rate": 0.0001995890073405113,
|
11356 |
+
"loss": 1.954,
|
11357 |
+
"step": 1533
|
11358 |
+
},
|
11359 |
+
{
|
11360 |
+
"epoch": 0.0872731410365819,
|
11361 |
+
"grad_norm": 0.4401033818721771,
|
11362 |
+
"learning_rate": 0.00019958846746474622,
|
11363 |
+
"loss": 1.7293,
|
11364 |
+
"step": 1534
|
11365 |
+
},
|
11366 |
+
{
|
11367 |
+
"epoch": 0.0873300335665927,
|
11368 |
+
"grad_norm": 0.5493918061256409,
|
11369 |
+
"learning_rate": 0.00019958792723535733,
|
11370 |
+
"loss": 1.7184,
|
11371 |
+
"step": 1535
|
11372 |
+
},
|
11373 |
+
{
|
11374 |
+
"epoch": 0.08738692609660352,
|
11375 |
+
"grad_norm": 0.4993182420730591,
|
11376 |
+
"learning_rate": 0.00019958738665234655,
|
11377 |
+
"loss": 1.885,
|
11378 |
+
"step": 1536
|
11379 |
+
},
|
11380 |
+
{
|
11381 |
+
"epoch": 0.08744381862661432,
|
11382 |
+
"grad_norm": 0.4622989892959595,
|
11383 |
+
"learning_rate": 0.00019958684571571574,
|
11384 |
+
"loss": 1.6466,
|
11385 |
+
"step": 1537
|
11386 |
+
},
|
11387 |
+
{
|
11388 |
+
"epoch": 0.08750071115662514,
|
11389 |
+
"grad_norm": 0.5067393779754639,
|
11390 |
+
"learning_rate": 0.0001995863044254669,
|
11391 |
+
"loss": 1.664,
|
11392 |
+
"step": 1538
|
11393 |
+
},
|
11394 |
+
{
|
11395 |
+
"epoch": 0.08755760368663594,
|
11396 |
+
"grad_norm": 0.5939873456954956,
|
11397 |
+
"learning_rate": 0.00019958576278160191,
|
11398 |
+
"loss": 2.0013,
|
11399 |
+
"step": 1539
|
11400 |
+
},
|
11401 |
+
{
|
11402 |
+
"epoch": 0.08761449621664676,
|
11403 |
+
"grad_norm": 0.48256736993789673,
|
11404 |
+
"learning_rate": 0.0001995852207841227,
|
11405 |
+
"loss": 1.8087,
|
11406 |
+
"step": 1540
|
11407 |
+
},
|
11408 |
+
{
|
11409 |
+
"epoch": 0.08761449621664676,
|
11410 |
+
"eval_loss": 1.8535445928573608,
|
11411 |
+
"eval_runtime": 129.0453,
|
11412 |
+
"eval_samples_per_second": 2.185,
|
11413 |
+
"eval_steps_per_second": 1.093,
|
11414 |
+
"step": 1540
|
11415 |
}
|
11416 |
],
|
11417 |
"logging_steps": 1,
|
|
|
11426 |
"early_stopping_threshold": 0.0
|
11427 |
},
|
11428 |
"attributes": {
|
11429 |
+
"early_stopping_patience_counter": 0
|
11430 |
}
|
11431 |
},
|
11432 |
"TrainerControl": {
|
|
|
11440 |
"attributes": {}
|
11441 |
}
|
11442 |
},
|
11443 |
+
"total_flos": 2.2037394141845914e+18,
|
11444 |
"train_batch_size": 2,
|
11445 |
"trial_name": null,
|
11446 |
"trial_params": null
|