Training in progress, step 600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 349243752
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2634d7d437e55d120f9cbfc1a0a647ba5e43707198ec229840f3cbd4c2c6010d
|
3 |
size 349243752
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 177909253
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77d6a6963848ad083ec9f75340262560feabd272bc333bcca7686444ebcc3703
|
3 |
size 177909253
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb0ee16dc80f0a35b0d265d0738a01e60e4a6bc16d147140edaf591ee6a31fd0
|
3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:326d14f5288c1d48dfb46fecae7e18839fcfd032bd3fac00e22ed4ca25616087
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -358,6 +358,76 @@
|
|
358 |
"learning_rate": 1.196412859476037e-05,
|
359 |
"loss": 1.4087,
|
360 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
}
|
362 |
],
|
363 |
"logging_steps": 10,
|
@@ -377,7 +447,7 @@
|
|
377 |
"attributes": {}
|
378 |
}
|
379 |
},
|
380 |
-
"total_flos":
|
381 |
"train_batch_size": 24,
|
382 |
"trial_name": null,
|
383 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.5970149253731343,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
358 |
"learning_rate": 1.196412859476037e-05,
|
359 |
"loss": 1.4087,
|
360 |
"step": 500
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"epoch": 0.5074626865671642,
|
364 |
+
"grad_norm": 0.15541517734527588,
|
365 |
+
"learning_rate": 1.1961691644685907e-05,
|
366 |
+
"loss": 1.3638,
|
367 |
+
"step": 510
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"epoch": 0.5174129353233831,
|
371 |
+
"grad_norm": 0.14922229945659637,
|
372 |
+
"learning_rate": 1.195917487915494e-05,
|
373 |
+
"loss": 1.4186,
|
374 |
+
"step": 520
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"epoch": 0.527363184079602,
|
378 |
+
"grad_norm": 0.1509064882993698,
|
379 |
+
"learning_rate": 1.1956578331862066e-05,
|
380 |
+
"loss": 1.3244,
|
381 |
+
"step": 530
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"epoch": 0.5373134328358209,
|
385 |
+
"grad_norm": 0.15153075754642487,
|
386 |
+
"learning_rate": 1.1953902037570002e-05,
|
387 |
+
"loss": 1.3692,
|
388 |
+
"step": 540
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"epoch": 0.5472636815920398,
|
392 |
+
"grad_norm": 0.18627804517745972,
|
393 |
+
"learning_rate": 1.1951146032109126e-05,
|
394 |
+
"loss": 1.3707,
|
395 |
+
"step": 550
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"epoch": 0.5572139303482587,
|
399 |
+
"grad_norm": 0.1417003720998764,
|
400 |
+
"learning_rate": 1.1948310352376988e-05,
|
401 |
+
"loss": 1.3976,
|
402 |
+
"step": 560
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"epoch": 0.5671641791044776,
|
406 |
+
"grad_norm": 0.14316676557064056,
|
407 |
+
"learning_rate": 1.1945395036337829e-05,
|
408 |
+
"loss": 1.4397,
|
409 |
+
"step": 570
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"epoch": 0.5771144278606966,
|
413 |
+
"grad_norm": 0.11908440291881561,
|
414 |
+
"learning_rate": 1.1942400123022057e-05,
|
415 |
+
"loss": 1.3614,
|
416 |
+
"step": 580
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"epoch": 0.5870646766169154,
|
420 |
+
"grad_norm": 0.15239761769771576,
|
421 |
+
"learning_rate": 1.1939325652525737e-05,
|
422 |
+
"loss": 1.4346,
|
423 |
+
"step": 590
|
424 |
+
},
|
425 |
+
{
|
426 |
+
"epoch": 0.5970149253731343,
|
427 |
+
"grad_norm": 0.14442642033100128,
|
428 |
+
"learning_rate": 1.193617166601005e-05,
|
429 |
+
"loss": 1.3687,
|
430 |
+
"step": 600
|
431 |
}
|
432 |
],
|
433 |
"logging_steps": 10,
|
|
|
447 |
"attributes": {}
|
448 |
}
|
449 |
},
|
450 |
+
"total_flos": 4.5084597833170944e+17,
|
451 |
"train_batch_size": 24,
|
452 |
"trial_name": null,
|
453 |
"trial_params": null
|