Training in progress, step 1960, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100697728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:736ce521e7280bd2c0975bc603cc94348d6eccc9990efd864828bf986063810d
|
3 |
size 100697728
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201541754
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5d973aa7ba038a74b7db64dcfe60c4781913554719fcda63eaae556dc3e290b
|
3 |
size 201541754
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f02d6e86658ff6b5c2087c9efdb8f98bc5474610047eb47a7fd80ecf387cb826
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07fafb27edca5562e76184af344027e6c5895e3f1fc53905ab247cd738263f94
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1559,14 +1559,30 @@
|
|
1559 |
"loss": 0.3968,
|
1560 |
"num_input_tokens_seen": 1312434,
|
1561 |
"step": 1940
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1562 |
}
|
1563 |
],
|
1564 |
"logging_steps": 10,
|
1565 |
"max_steps": 2795,
|
1566 |
-
"num_input_tokens_seen":
|
1567 |
"num_train_epochs": 1,
|
1568 |
"save_steps": 20,
|
1569 |
-
"total_flos": 2.
|
1570 |
"train_batch_size": 1,
|
1571 |
"trial_name": null,
|
1572 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7012522361359571,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1960,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1559 |
"loss": 0.3968,
|
1560 |
"num_input_tokens_seen": 1312434,
|
1561 |
"step": 1940
|
1562 |
+
},
|
1563 |
+
{
|
1564 |
+
"epoch": 0.6976744186046512,
|
1565 |
+
"grad_norm": 0.27954959869384766,
|
1566 |
+
"learning_rate": 6.0465116279069765e-05,
|
1567 |
+
"loss": 0.4071,
|
1568 |
+
"num_input_tokens_seen": 1320352,
|
1569 |
+
"step": 1950
|
1570 |
+
},
|
1571 |
+
{
|
1572 |
+
"epoch": 0.7012522361359571,
|
1573 |
+
"grad_norm": 0.36177489161491394,
|
1574 |
+
"learning_rate": 5.974955277280859e-05,
|
1575 |
+
"loss": 0.3683,
|
1576 |
+
"num_input_tokens_seen": 1326200,
|
1577 |
+
"step": 1960
|
1578 |
}
|
1579 |
],
|
1580 |
"logging_steps": 10,
|
1581 |
"max_steps": 2795,
|
1582 |
+
"num_input_tokens_seen": 1326200,
|
1583 |
"num_train_epochs": 1,
|
1584 |
"save_steps": 20,
|
1585 |
+
"total_flos": 2.98215546679296e+16,
|
1586 |
"train_batch_size": 1,
|
1587 |
"trial_name": null,
|
1588 |
"trial_params": null
|