Upload task output test1334test1234test1234test12334
Browse files- README.md +12 -14
- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- checkpoint-1/README.md +2 -2
- checkpoint-1/adapter_config.json +10 -5
- checkpoint-1/adapter_model.safetensors +1 -1
- checkpoint-1/chat_template.jinja +5 -0
- checkpoint-1/optimizer.pt +1 -1
- checkpoint-1/rng_state.pth +1 -1
- checkpoint-1/scheduler.pt +1 -1
- checkpoint-1/tokenizer_config.json +0 -1
- checkpoint-1/trainer_state.json +6 -13
- checkpoint-1/training_args.bin +2 -2
README.md
CHANGED
@@ -38,7 +38,7 @@ deepspeed: null
|
|
38 |
early_stopping_patience: null
|
39 |
eval_max_new_tokens: 128
|
40 |
eval_table_size: null
|
41 |
-
evals_per_epoch:
|
42 |
flash_attention: false
|
43 |
fp16: null
|
44 |
fsdp: null
|
@@ -58,7 +58,7 @@ lora_model_dir: null
|
|
58 |
lora_r: 8
|
59 |
lora_target_linear: true
|
60 |
lr_scheduler: cosine
|
61 |
-
max_steps:
|
62 |
micro_batch_size: 2
|
63 |
mlflow_experiment_name: /workspace/axolotl/data/test1334test1234test1234test12334_train_data.json
|
64 |
model_type: AutoModelForCausalLM
|
@@ -69,7 +69,7 @@ pad_to_sequence_len: true
|
|
69 |
resume_from_checkpoint: null
|
70 |
s2_attention: null
|
71 |
sample_packing: false
|
72 |
-
saves_per_epoch:
|
73 |
sequence_len: 512
|
74 |
strict: false
|
75 |
tf32: false
|
@@ -77,7 +77,13 @@ tokenizer_type: AutoTokenizer
|
|
77 |
train_on_inputs: false
|
78 |
trust_remote_code: true
|
79 |
val_set_size: 0.05
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
weight_decay: 0.0
|
82 |
xformers_attention: null
|
83 |
|
@@ -88,8 +94,6 @@ xformers_attention: null
|
|
88 |
# workspace/axolotl/outputs/test1334test1234test1234test12334/texttest
|
89 |
|
90 |
This model was trained from scratch on the None dataset.
|
91 |
-
It achieves the following results on the evaluation set:
|
92 |
-
- Loss: 0.9023
|
93 |
|
94 |
## Model description
|
95 |
|
@@ -116,17 +120,11 @@ The following hyperparameters were used during training:
|
|
116 |
- total_train_batch_size: 8
|
117 |
- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
118 |
- lr_scheduler_type: cosine
|
119 |
-
- lr_scheduler_warmup_steps:
|
120 |
-
- training_steps:
|
121 |
|
122 |
### Training results
|
123 |
|
124 |
-
| Training Loss | Epoch | Step | Validation Loss |
|
125 |
-
|:-------------:|:------:|:----:|:---------------:|
|
126 |
-
| No log | 0 | 0 | 0.9052 |
|
127 |
-
| 1.0164 | 0.0372 | 3 | 0.9057 |
|
128 |
-
| 1.0848 | 0.0743 | 6 | 0.9046 |
|
129 |
-
| 1.0387 | 0.1115 | 9 | 0.9023 |
|
130 |
|
131 |
|
132 |
### Framework versions
|
|
|
38 |
early_stopping_patience: null
|
39 |
eval_max_new_tokens: 128
|
40 |
eval_table_size: null
|
41 |
+
evals_per_epoch: 1
|
42 |
flash_attention: false
|
43 |
fp16: null
|
44 |
fsdp: null
|
|
|
58 |
lora_r: 8
|
59 |
lora_target_linear: true
|
60 |
lr_scheduler: cosine
|
61 |
+
max_steps: 1
|
62 |
micro_batch_size: 2
|
63 |
mlflow_experiment_name: /workspace/axolotl/data/test1334test1234test1234test12334_train_data.json
|
64 |
model_type: AutoModelForCausalLM
|
|
|
69 |
resume_from_checkpoint: null
|
70 |
s2_attention: null
|
71 |
sample_packing: false
|
72 |
+
saves_per_epoch: 1
|
73 |
sequence_len: 512
|
74 |
strict: false
|
75 |
tf32: false
|
|
|
77 |
train_on_inputs: false
|
78 |
trust_remote_code: true
|
79 |
val_set_size: 0.05
|
80 |
+
wandb_entity: null
|
81 |
+
wandb_mode: offline
|
82 |
+
wandb_name: test1334test1234test1234test12334_texttest
|
83 |
+
wandb_project: Gradients-On-Demand
|
84 |
+
wandb_run: your_name
|
85 |
+
wandb_runid: test1334test1234test1234test12334_texttest
|
86 |
+
warmup_steps: 1
|
87 |
weight_decay: 0.0
|
88 |
xformers_attention: null
|
89 |
|
|
|
94 |
# workspace/axolotl/outputs/test1334test1234test1234test12334/texttest
|
95 |
|
96 |
This model was trained from scratch on the None dataset.
|
|
|
|
|
97 |
|
98 |
## Model description
|
99 |
|
|
|
120 |
- total_train_batch_size: 8
|
121 |
- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
122 |
- lr_scheduler_type: cosine
|
123 |
+
- lr_scheduler_warmup_steps: 2
|
124 |
+
- training_steps: 1
|
125 |
|
126 |
### Training results
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
|
130 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -25,12 +25,12 @@
|
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
"q_proj",
|
28 |
-
"
|
29 |
-
"
|
30 |
"k_proj",
|
31 |
"up_proj",
|
32 |
-
"
|
33 |
-
"
|
34 |
],
|
35 |
"task_type": "CAUSAL_LM",
|
36 |
"trainable_token_indices": null,
|
|
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
"q_proj",
|
28 |
+
"gate_proj",
|
29 |
+
"down_proj",
|
30 |
"k_proj",
|
31 |
"up_proj",
|
32 |
+
"o_proj",
|
33 |
+
"v_proj"
|
34 |
],
|
35 |
"task_type": "CAUSAL_LM",
|
36 |
"trainable_token_indices": null,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 22573704
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce2e9c10572d78b4dfbc2a0ae3083c41f64ff61a0e9b50805149deb9ff0315d7
|
3 |
size 22573704
|
checkpoint-1/README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
base_model: samoline
|
3 |
library_name: peft
|
4 |
---
|
5 |
|
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
199 |
[More Information Needed]
|
200 |
### Framework versions
|
201 |
|
202 |
-
- PEFT 0.
|
|
|
1 |
---
|
2 |
+
base_model: /cache/test1334test1234test1234test12334/models/samoline--b7447218-27e6-491c-b3ab-ea03a5b93541
|
3 |
library_name: peft
|
4 |
---
|
5 |
|
|
|
199 |
[More Information Needed]
|
200 |
### Framework versions
|
201 |
|
202 |
+
- PEFT 0.15.2
|
checkpoint-1/adapter_config.json
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
-
"base_model_name_or_path": "samoline
|
5 |
"bias": "none",
|
|
|
|
|
|
|
6 |
"fan_in_fan_out": null,
|
7 |
"inference_mode": true,
|
8 |
"init_lora_weights": true,
|
@@ -11,6 +14,7 @@
|
|
11 |
"layers_to_transform": null,
|
12 |
"loftq_config": {},
|
13 |
"lora_alpha": 16,
|
|
|
14 |
"lora_dropout": 0.05,
|
15 |
"megatron_config": null,
|
16 |
"megatron_core": "megatron.core",
|
@@ -20,15 +24,16 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"gate_proj",
|
24 |
-
"up_proj",
|
25 |
-
"k_proj",
|
26 |
"down_proj",
|
27 |
-
"
|
|
|
28 |
"o_proj",
|
29 |
-
"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
32 |
"use_dora": false,
|
33 |
"use_rslora": false
|
34 |
}
|
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "/cache/test1334test1234test1234test12334/models/samoline--b7447218-27e6-491c-b3ab-ea03a5b93541",
|
5 |
"bias": "none",
|
6 |
+
"corda_config": null,
|
7 |
+
"eva_config": null,
|
8 |
+
"exclude_modules": null,
|
9 |
"fan_in_fan_out": null,
|
10 |
"inference_mode": true,
|
11 |
"init_lora_weights": true,
|
|
|
14 |
"layers_to_transform": null,
|
15 |
"loftq_config": {},
|
16 |
"lora_alpha": 16,
|
17 |
+
"lora_bias": false,
|
18 |
"lora_dropout": 0.05,
|
19 |
"megatron_config": null,
|
20 |
"megatron_core": "megatron.core",
|
|
|
24 |
"rank_pattern": {},
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
+
"q_proj",
|
28 |
"gate_proj",
|
|
|
|
|
29 |
"down_proj",
|
30 |
+
"k_proj",
|
31 |
+
"up_proj",
|
32 |
"o_proj",
|
33 |
+
"v_proj"
|
34 |
],
|
35 |
"task_type": "CAUSAL_LM",
|
36 |
+
"trainable_token_indices": null,
|
37 |
"use_dora": false,
|
38 |
"use_rslora": false
|
39 |
}
|
checkpoint-1/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 22573704
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce2e9c10572d78b4dfbc2a0ae3083c41f64ff61a0e9b50805149deb9ff0315d7
|
3 |
size 22573704
|
checkpoint-1/chat_template.jinja
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>
|
2 |
+
|
3 |
+
'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
|
4 |
+
|
5 |
+
' }}{% endif %}
|
checkpoint-1/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 11710970
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:876bb53a025a237edcf5100f4fb0c41ac9d1e85c3f646614d528015a4ee9e484
|
3 |
size 11710970
|
checkpoint-1/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f97bb552d304801def1800311e9b4f0db4888641a7d6326f60a0099954973d2b
|
3 |
size 14244
|
checkpoint-1/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe0540ed6e94d693724d0fd61916b0efd9dc6adbac83c0b0a2d35823214a0a5
|
3 |
size 1064
|
checkpoint-1/tokenizer_config.json
CHANGED
@@ -2051,7 +2051,6 @@
|
|
2051 |
}
|
2052 |
},
|
2053 |
"bos_token": "<|begin_of_text|>",
|
2054 |
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
2055 |
"clean_up_tokenization_spaces": true,
|
2056 |
"eos_token": "<|end_of_text|>",
|
2057 |
"extra_special_tokens": {},
|
|
|
2051 |
}
|
2052 |
},
|
2053 |
"bos_token": "<|begin_of_text|>",
|
|
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"extra_special_tokens": {},
|
checkpoint-1/trainer_state.json
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 1,
|
7 |
"is_hyper_param_search": false,
|
@@ -9,18 +10,10 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch":
|
13 |
-
"grad_norm": 0.
|
14 |
"learning_rate": 0.0,
|
15 |
-
"loss": 0.
|
16 |
-
"step": 1
|
17 |
-
},
|
18 |
-
{
|
19 |
-
"epoch": 0.008421052631578947,
|
20 |
-
"eval_loss": 0.8261134624481201,
|
21 |
-
"eval_runtime": 1.1415,
|
22 |
-
"eval_samples_per_second": 43.801,
|
23 |
-
"eval_steps_per_second": 21.901,
|
24 |
"step": 1
|
25 |
}
|
26 |
],
|
@@ -41,7 +34,7 @@
|
|
41 |
"attributes": {}
|
42 |
}
|
43 |
},
|
44 |
-
"total_flos":
|
45 |
"train_batch_size": 2,
|
46 |
"trial_name": null,
|
47 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 8.17026839331672e-05,
|
6 |
"eval_steps": 500,
|
7 |
"global_step": 1,
|
8 |
"is_hyper_param_search": false,
|
|
|
10 |
"is_world_process_zero": true,
|
11 |
"log_history": [
|
12 |
{
|
13 |
+
"epoch": 8.17026839331672e-05,
|
14 |
+
"grad_norm": 0.5843780636787415,
|
15 |
"learning_rate": 0.0,
|
16 |
+
"loss": 0.925,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
"step": 1
|
18 |
}
|
19 |
],
|
|
|
34 |
"attributes": {}
|
35 |
}
|
36 |
},
|
37 |
+
"total_flos": 24054551543808.0,
|
38 |
"train_batch_size": 2,
|
39 |
"trial_name": null,
|
40 |
"trial_params": null
|
checkpoint-1/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:169fcda599c2acc655ce38e05e7603c80e5959ce0cd31227facd2f596cd183b3
|
3 |
+
size 7224
|