Training in progress, step 1
Browse files- adapter_config.json +2 -2
- adapter_model.safetensors +1 -1
- all_results.json +7 -3
- eval_results.json +7 -0
- metrics.json +1 -0
- train_results.json +3 -3
- trainer_state.json +4 -4
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -26,10 +26,10 @@
|
|
26 |
"target_modules": [
|
27 |
"down_proj",
|
28 |
"o_proj",
|
|
|
|
|
29 |
"v_proj",
|
30 |
"q_proj",
|
31 |
-
"up_proj",
|
32 |
-
"gate_proj",
|
33 |
"k_proj"
|
34 |
],
|
35 |
"task_type": "CAUSAL_LM",
|
|
|
26 |
"target_modules": [
|
27 |
"down_proj",
|
28 |
"o_proj",
|
29 |
+
"gate_proj",
|
30 |
+
"up_proj",
|
31 |
"v_proj",
|
32 |
"q_proj",
|
|
|
|
|
33 |
"k_proj"
|
34 |
],
|
35 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 844181408
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c630bd460b5a5877a85393ea520a3c39785eedb1cd146f02eb0d896e89c4a2b6
|
3 |
size 844181408
|
all_results.json
CHANGED
@@ -1,8 +1,12 @@
|
|
1 |
{
|
2 |
"epoch": 0.001813647698934482,
|
|
|
|
|
|
|
|
|
3 |
"total_flos": 333765997756416.0,
|
4 |
"train_loss": 1.5015426874160767,
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second": 0.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 0.001813647698934482,
|
3 |
+
"eval_loss": 1.7385717630386353,
|
4 |
+
"eval_runtime": 1254.5458,
|
5 |
+
"eval_samples_per_second": 0.797,
|
6 |
+
"eval_steps_per_second": 0.797,
|
7 |
"total_flos": 333765997756416.0,
|
8 |
"train_loss": 1.5015426874160767,
|
9 |
+
"train_runtime": 74.6898,
|
10 |
+
"train_samples_per_second": 0.214,
|
11 |
+
"train_steps_per_second": 0.013
|
12 |
}
|
eval_results.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.001813647698934482,
|
3 |
+
"eval_loss": 1.7385717630386353,
|
4 |
+
"eval_runtime": 1254.5458,
|
5 |
+
"eval_samples_per_second": 0.797,
|
6 |
+
"eval_steps_per_second": 0.797
|
7 |
+
}
|
metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"run_name": "huggyllama/llama-7b_oasst1_l0.0002_4", "train_runtime": 74.6898, "train_samples_per_second": 0.214, "train_steps_per_second": 0.013, "total_flos": 333765997756416.0, "train_loss": 1.5015426874160767, "epoch": 0.001813647698934482, "eval_loss": 1.7385717630386353, "eval_runtime": 1254.5458, "eval_samples_per_second": 0.797, "eval_steps_per_second": 0.797}
|
train_results.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"epoch": 0.001813647698934482,
|
3 |
"total_flos": 333765997756416.0,
|
4 |
"train_loss": 1.5015426874160767,
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples_per_second": 0.
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
2 |
"epoch": 0.001813647698934482,
|
3 |
"total_flos": 333765997756416.0,
|
4 |
"train_loss": 1.5015426874160767,
|
5 |
+
"train_runtime": 74.6898,
|
6 |
+
"train_samples_per_second": 0.214,
|
7 |
+
"train_steps_per_second": 0.013
|
8 |
}
|
trainer_state.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.001813647698934482,
|
13 |
-
"grad_norm": 0.
|
14 |
"learning_rate": 0.0002,
|
15 |
"loss": 1.5015,
|
16 |
"step": 1
|
@@ -20,9 +20,9 @@
|
|
20 |
"step": 1,
|
21 |
"total_flos": 333765997756416.0,
|
22 |
"train_loss": 1.5015426874160767,
|
23 |
-
"train_runtime":
|
24 |
-
"train_samples_per_second": 0.
|
25 |
-
"train_steps_per_second": 0.
|
26 |
}
|
27 |
],
|
28 |
"logging_steps": 1,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.001813647698934482,
|
13 |
+
"grad_norm": 0.197265625,
|
14 |
"learning_rate": 0.0002,
|
15 |
"loss": 1.5015,
|
16 |
"step": 1
|
|
|
20 |
"step": 1,
|
21 |
"total_flos": 333765997756416.0,
|
22 |
"train_loss": 1.5015426874160767,
|
23 |
+
"train_runtime": 74.6898,
|
24 |
+
"train_samples_per_second": 0.214,
|
25 |
+
"train_steps_per_second": 0.013
|
26 |
}
|
27 |
],
|
28 |
"logging_steps": 1,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7800
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d77c7466455189fa8d82512831f7f8e39359808a2c6be710f7ae9364ddf125e6
|
3 |
size 7800
|