SystemAdmin123
commited on
Training in progress, step 40
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 00000000-0000-0000-0000-000000000000.yml +48 -0
- 00000000-0000-0000-0000-000000000001.yml +48 -0
- 00000000-0000-0000-0000-000000000123.yml +50 -0
- 0017f0db-d7f8-4f78-9dd4-effb388c566e.yml +50 -0
- 01b60291-41f3-4631-b7e8-f7c60c2ca163.yml +53 -0
- 03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml +52 -0
- 0573e27c-6a6f-4989-9b30-ba29f333396d.yml +50 -0
- 077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml +51 -0
- 078ba83d-394d-4561-8dc4-25f7f5d342f4.yml +50 -0
- 08edabf0-86e9-4b88-b7f8-7383b8455fd9.yml +51 -0
- 09b9ec60-551f-42fb-a5bc-3bb998c6bcb7.yml +50 -0
- 102906fb-e0c6-458b-91ed-ea75a39fdc12.yml +50 -0
- 15b64bb1-e5f8-4728-878b-8b09852c0c75.yml +51 -0
- 19c8688e-ea72-45f4-ad76-056d1e3fe378.yml +53 -0
- 1a0ae1a8-90b0-4064-a957-ec08b8a626a2.yml +51 -0
- 1dc178e8-8f66-48ae-8ebb-825428c168d0.yml +53 -0
- 1e84d93c-fc85-4f62-a6d4-0e9ec4aefaa5.yml +50 -0
- 1ead3944-f2ff-490c-8e71-0e1cf0736354.yml +52 -0
- 21315ae5-16ee-43cd-9612-743524060933.yml +50 -0
- 2135dd61-bfbc-4980-8694-eb2672292c86.yml +50 -0
- 2311c30b-5869-4555-8890-0c7c01a7e6d8.yml +50 -0
- 233e1171-06fa-47f5-a61c-f0a283fd0346.yml +50 -0
- 2593284e-8d2b-49a4-9d90-a5407a2dda74.yml +51 -0
- 260bfc16-ba7e-4e28-99db-8a240db68244.yml +50 -0
- 2675458f-3d2d-4c3e-b7c3-94569cdd95ed.yml +51 -0
- 284c3982-7bc3-4e42-a78c-849f03798c5f.yml +51 -0
- 29594d3f-2fa3-437a-80c0-be13835d5ddd.yml +53 -0
- 29e89a2c-6136-48b6-88bc-a0066652be7d.yml +52 -0
- 2b93d38c-cfcf-4c91-bd74-d78b57d6e88c.yml +50 -0
- 2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml +53 -0
- 2db36e53-9fad-4e67-b969-b3cc09303391.yml +53 -0
- 2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml +51 -0
- 2fe0c844-e98c-476d-b9a0-1a41beb91022.yml +50 -0
- 30e923d1-5b61-4748-ad75-b5645e8a66f9.yml +52 -0
- 356b1304-361f-49fb-bc60-43e6188679c1.yml +51 -0
- 36a43860-f8fa-4c32-afb5-c665be741dc4.yml +52 -0
- 3797da9e-eaaf-4f36-ac37-50d8b1c8015a.yml +51 -0
- 393d9fe7-0c17-4f36-b481-1a50ecf87c09.yml +50 -0
- 39a09fca-d7db-49b6-9d31-760f252e1a05.yml +50 -0
- 3b1bd1a9-4f82-45c0-b25f-14c4857d43f1.yml +51 -0
- 3c645e30-be80-48c3-9eda-ca9a31c5ac1d.yml +53 -0
- 3d940075-c928-42d9-8a10-07211af2fe5d.yml +50 -0
- 40f27435-f59d-488f-b2d6-01e356d79c48.yml +50 -0
- 419d7d82-9c5d-40a8-9096-ea94ed8503b4.yml +50 -0
- 424a19bd-9a6f-4171-a205-745ee0bc1a03.yml +50 -0
- 427d02be-6008-4556-9a5e-9c7cb7503058.yml +51 -0
- 428f88e9-4d8c-411a-a2d9-f723f8dc8229.yml +50 -0
- 44d8aa1a-ecd0-42f5-80d3-60ff76d8044e.yml +53 -0
- 4524a254-d619-44de-addb-80d1a56f0224.yml +52 -0
- 4692c3b1-0351-4533-948d-ace8c76ceb1f.yml +50 -0
00000000-0000-0000-0000-000000000000.yml
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: peft-internal-testing/tiny-dummy-qwen2
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- format: custom
|
7 |
+
path: argilla/databricks-dolly-15k-curated-en
|
8 |
+
type:
|
9 |
+
field_input: original-instruction
|
10 |
+
field_instruction: original-instruction
|
11 |
+
field_output: original-response
|
12 |
+
format: '{instruction} {input}'
|
13 |
+
no_input_format: '{instruction}'
|
14 |
+
system_format: '{system}'
|
15 |
+
system_prompt: ''
|
16 |
+
eval_steps: 20
|
17 |
+
flash_attention: true
|
18 |
+
gpu_memory_limit: 80GiB
|
19 |
+
gradient_checkpointing: true
|
20 |
+
group_by_length: true
|
21 |
+
hub_model_id: SystemAdmin123/test-repo
|
22 |
+
hub_strategy: checkpoint
|
23 |
+
learning_rate: 0.0002
|
24 |
+
logging_steps: 10
|
25 |
+
lr_scheduler: cosine
|
26 |
+
micro_batch_size: 19
|
27 |
+
model_type: AutoModelForCausalLM
|
28 |
+
num_epochs: 10
|
29 |
+
optimizer: adamw_bnb_8bit
|
30 |
+
output_dir: /workspace/axolotl/configs
|
31 |
+
pad_to_sequence_len: true
|
32 |
+
resize_token_embeddings_to_32x: false
|
33 |
+
sample_packing: false
|
34 |
+
save_steps: 40
|
35 |
+
save_total_limit: 1
|
36 |
+
sequence_len: 2048
|
37 |
+
tokenizer_type: Qwen2TokenizerFast
|
38 |
+
train_on_inputs: false
|
39 |
+
trust_remote_code: true
|
40 |
+
val_set_size: 0.1
|
41 |
+
wandb_entity: ''
|
42 |
+
wandb_mode: online
|
43 |
+
wandb_name: peft-internal-testing/tiny-dummy-qwen2-argilla/databricks-dolly-15k-curated-en
|
44 |
+
wandb_project: Gradients-On-Demand
|
45 |
+
wandb_run: your_name
|
46 |
+
wandb_runid: default
|
47 |
+
warmup_ratio: 0.05
|
48 |
+
xformers_attention: true
|
00000000-0000-0000-0000-000000000001.yml
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Qwen/Qwen2-7B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- format: custom
|
7 |
+
path: argilla/databricks-dolly-15k-curated-en
|
8 |
+
type:
|
9 |
+
field_input: original-instruction
|
10 |
+
field_instruction: original-instruction
|
11 |
+
field_output: original-response
|
12 |
+
format: '{instruction} {input}'
|
13 |
+
no_input_format: '{instruction}'
|
14 |
+
system_format: '{system}'
|
15 |
+
system_prompt: ''
|
16 |
+
eval_steps: 20
|
17 |
+
flash_attention: true
|
18 |
+
gpu_memory_limit: 80GiB
|
19 |
+
gradient_checkpointing: true
|
20 |
+
group_by_length: true
|
21 |
+
hub_model_id: SystemAdmin123/test-repo
|
22 |
+
hub_strategy: checkpoint
|
23 |
+
learning_rate: 0.0002
|
24 |
+
logging_steps: 10
|
25 |
+
lr_scheduler: cosine
|
26 |
+
max_steps: 2500
|
27 |
+
micro_batch_size: 4
|
28 |
+
model_type: AutoModelForCausalLM
|
29 |
+
optimizer: adamw_bnb_8bit
|
30 |
+
output_dir: /workspace/axolotl/configs
|
31 |
+
pad_to_sequence_len: true
|
32 |
+
resize_token_embeddings_to_32x: false
|
33 |
+
sample_packing: false
|
34 |
+
save_steps: 40
|
35 |
+
save_total_limit: 1
|
36 |
+
sequence_len: 2048
|
37 |
+
tokenizer_type: Qwen2TokenizerFast
|
38 |
+
train_on_inputs: false
|
39 |
+
trust_remote_code: true
|
40 |
+
val_set_size: 0.1
|
41 |
+
wandb_entity: ''
|
42 |
+
wandb_mode: online
|
43 |
+
wandb_name: Qwen/Qwen2-7B-Instruct-argilla/databricks-dolly-15k-curated-en
|
44 |
+
wandb_project: Gradients-On-Demand
|
45 |
+
wandb_run: your_name
|
46 |
+
wandb_runid: default
|
47 |
+
warmup_ratio: 0.05
|
48 |
+
xformers_attention: true
|
00000000-0000-0000-0000-000000000123.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: fxmarty/small-llama-testing
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- format: custom
|
7 |
+
path: argilla/databricks-dolly-15k-curated-en
|
8 |
+
type:
|
9 |
+
field_input: original-instruction
|
10 |
+
field_instruction: original-instruction
|
11 |
+
field_output: original-response
|
12 |
+
format: '{instruction} {input}'
|
13 |
+
no_input_format: '{instruction}'
|
14 |
+
system_format: '{system}'
|
15 |
+
system_prompt: ''
|
16 |
+
eval_steps: 20
|
17 |
+
flash_attention: true
|
18 |
+
gpu_memory_limit: 80GiB
|
19 |
+
gradient_checkpointing: true
|
20 |
+
group_by_length: true
|
21 |
+
hub_model_id: SystemAdmin123/test-repo
|
22 |
+
hub_strategy: checkpoint
|
23 |
+
learning_rate: 0.0002
|
24 |
+
logging_steps: 10
|
25 |
+
lr_scheduler: cosine
|
26 |
+
micro_batch_size: 19
|
27 |
+
model_type: AutoModelForCausalLM
|
28 |
+
num_epochs: 10
|
29 |
+
optimizer: adamw_bnb_8bit
|
30 |
+
output_dir: /workspace/axolotl/configs
|
31 |
+
pad_to_sequence_len: true
|
32 |
+
resize_token_embeddings_to_32x: false
|
33 |
+
sample_packing: false
|
34 |
+
save_steps: 40
|
35 |
+
save_total_limit: 1
|
36 |
+
sequence_len: 2048
|
37 |
+
special_tokens:
|
38 |
+
pad_token: </s>
|
39 |
+
tokenizer_type: LlamaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: fxmarty/small-llama-testing-argilla/databricks-dolly-15k-curated-en
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
0017f0db-d7f8-4f78-9dd4-effb388c566e.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Qwen/Qwen2.5-Math-7B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- e379af78ad5f53fa_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/e379af78ad5f53fa_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: SeedRule
|
13 |
+
field_output: prompt
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/1b230251-8147-4d9a-b940-904d5e878a9d
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 2
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/tmp/e379af78ad5f53fa_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
01b60291-41f3-4631-b7e8-f7c60c2ca163.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: huggyllama/llama-7b
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- ccd32583f980ebf0_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/ccd32583f980ebf0_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: ''
|
13 |
+
field_instruction: problem
|
14 |
+
field_output: solution
|
15 |
+
format: '{instruction}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/fa36bf4c-34a6-4e51-ae14-a8372bf92b39
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: </s>
|
42 |
+
tokenizer_type: LlamaTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: huggyllama/llama-7b-/workspace/input_data/ccd32583f980ebf0_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: fxmarty/tiny-llama-fast-tokenizer
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- fc6136aac03f618a_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/fc6136aac03f618a_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: text
|
13 |
+
field_output: title
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
special_tokens:
|
40 |
+
pad_token: </s>
|
41 |
+
tokenizer_type: LlamaTokenizerFast
|
42 |
+
train_on_inputs: false
|
43 |
+
trust_remote_code: true
|
44 |
+
val_set_size: 0.1
|
45 |
+
wandb_entity: ''
|
46 |
+
wandb_mode: online
|
47 |
+
wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
|
48 |
+
wandb_project: Gradients-On-Demand
|
49 |
+
wandb_run: your_name
|
50 |
+
wandb_runid: default
|
51 |
+
warmup_ratio: 0.05
|
52 |
+
xformers_attention: true
|
0573e27c-6a6f-4989-9b30-ba29f333396d.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: dltjdgh0928/test_instruction
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 76022a30315552b8_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/76022a30315552b8_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: input
|
13 |
+
field_output: target
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/c32e2a6e-d46b-44d8-953f-917301378acd
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: LlamaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: dltjdgh0928/test_instruction-/workspace/input_data/76022a30315552b8_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/mistral-7b-v0.3
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- ca0152973425c947_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/ca0152973425c947_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: code
|
13 |
+
field_instruction: func_name
|
14 |
+
field_output: docstring
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/5a2f5ce6-446b-4282-bb4d-9ee4e970231f
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: LlamaTokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: unsloth/mistral-7b-v0.3-/tmp/ca0152973425c947_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
078ba83d-394d-4561-8dc4-25f7f5d342f4.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: numind/NuExtract-v1.5
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- b23f426a106701fe_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/b23f426a106701fe_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: question
|
13 |
+
field_output: answer
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/53cbcb68-02e6-4a0d-ae57-14c9018f7e1d
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: LlamaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: numind/NuExtract-v1.5-/workspace/input_data/b23f426a106701fe_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
08edabf0-86e9-4b88-b7f8-7383b8455fd9.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: facebook/opt-350m
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 5502b08b9b3b41f5_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/5502b08b9b3b41f5_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: function
|
13 |
+
field_instruction: cwe_description
|
14 |
+
field_output: cve_description
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/ea3910f1-2762-4e77-bf49-50261890a7c3
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 19
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: GPT2TokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: facebook/opt-350m-/tmp/5502b08b9b3b41f5_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
09b9ec60-551f-42fb-a5bc-3bb998c6bcb7.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Qwen2-0.5B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 86ba74df94452c98_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/86ba74df94452c98_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: instruction
|
13 |
+
field_output: output
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/6f0d89e4-45b1-4dc8-877b-bec29de112de
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/Qwen2-0.5B-Instruct-/workspace/input_data/86ba74df94452c98_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
102906fb-e0c6-458b-91ed-ea75a39fdc12.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Phi-3.5-mini-instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 286ecee6d914760d_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/286ecee6d914760d_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: ja
|
13 |
+
field_output: en
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/df962b68-cd57-4f76-a085-6da16b65c260
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 19
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: LlamaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/Phi-3.5-mini-instruct-/tmp/286ecee6d914760d_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
15b64bb1-e5f8-4728-878b-8b09852c0c75.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Eurdem/Defne_llama3_2x8B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 235dcd465bd8663e_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/235dcd465bd8663e_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: input
|
13 |
+
field_instruction: instruction
|
14 |
+
field_output: output
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/4865f614-7ca8-416c-a631-a000a6af7a20
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 2
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: PreTrainedTokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: Eurdem/Defne_llama3_2x8B-/tmp/235dcd465bd8663e_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
19c8688e-ea72-45f4-ad76-056d1e3fe378.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: codellama/CodeLlama-7b-Instruct-hf
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 0af14c27ef012868_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/0af14c27ef012868_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: text
|
13 |
+
field_instruction: subject
|
14 |
+
field_output: title
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/69c5f55b-dddc-4b99-a936-751161256f95
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: </s>
|
42 |
+
tokenizer_type: CodeLlamaTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: codellama/CodeLlama-7b-Instruct-hf-/workspace/input_data/0af14c27ef012868_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
1a0ae1a8-90b0-4064-a957-ec08b8a626a2.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: jhflow/mistral7b-lora-multi-turn-v2
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 77a0df1c69074e81_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/77a0df1c69074e81_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: keyword
|
13 |
+
field_instruction: abstract
|
14 |
+
field_output: title
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/0c660dfd-6e21-4c2b-9403-0c4b6ad1bf52
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 2
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: LlamaTokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: jhflow/mistral7b-lora-multi-turn-v2-/tmp/77a0df1c69074e81_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
1dc178e8-8f66-48ae-8ebb-825428c168d0.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: NousResearch/Yarn-Mistral-7b-64k
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- bccab6bcbcb6fc03_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/bccab6bcbcb6fc03_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: choices
|
13 |
+
field_instruction: full_prompt
|
14 |
+
field_output: example
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/93085afc-6d0b-49ca-ac4a-839ea57462a9
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: </s>
|
42 |
+
tokenizer_type: LlamaTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: NousResearch/Yarn-Mistral-7b-64k-/workspace/input_data/bccab6bcbcb6fc03_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
1e84d93c-fc85-4f62-a6d4-0e9ec4aefaa5.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: fxmarty/tiny-dummy-qwen2
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- c0a032ebb939dd62_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/c0a032ebb939dd62_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: prompt
|
13 |
+
field_output: chosen
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/da53a378-c2cd-4060-af7d-31ee77ad94d8
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 19
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: fxmarty/tiny-dummy-qwen2-/tmp/c0a032ebb939dd62_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
1ead3944-f2ff-490c-8e71-0e1cf0736354.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: NousResearch/Meta-Llama-3-8B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 079da9e23dfe7fea_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/079da9e23dfe7fea_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: filename
|
13 |
+
field_output: title
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/3cb570bf-4ffe-411e-86e0-ad8b9bf19f1b
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
special_tokens:
|
40 |
+
pad_token: <|end_of_text|>
|
41 |
+
tokenizer_type: PreTrainedTokenizerFast
|
42 |
+
train_on_inputs: false
|
43 |
+
trust_remote_code: true
|
44 |
+
val_set_size: 0.1
|
45 |
+
wandb_entity: ''
|
46 |
+
wandb_mode: online
|
47 |
+
wandb_name: NousResearch/Meta-Llama-3-8B-/workspace/input_data/079da9e23dfe7fea_train_data.json
|
48 |
+
wandb_project: Gradients-On-Demand
|
49 |
+
wandb_run: your_name
|
50 |
+
wandb_runid: default
|
51 |
+
warmup_ratio: 0.05
|
52 |
+
xformers_attention: true
|
21315ae5-16ee-43cd-9612-743524060933.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Meta-Llama-3.1-8B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 562fa3aeea07046a_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/562fa3aeea07046a_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: prompt
|
13 |
+
field_output: text
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/c4596edc-efad-4776-86a1-caa06bffcada
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: PreTrainedTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/Meta-Llama-3.1-8B-/workspace/input_data/562fa3aeea07046a_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
2135dd61-bfbc-4980-8694-eb2672292c86.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: peft-internal-testing/tiny-dummy-qwen2
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 7041e6a7c6976935_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/7041e6a7c6976935_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: Sequence
|
13 |
+
field_output: Secondary_structure
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/aa07d7d0-0e68-4d05-a943-be6d5a82bcf5
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 19
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: peft-internal-testing/tiny-dummy-qwen2-/tmp/7041e6a7c6976935_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
2311c30b-5869-4555-8890-0c7c01a7e6d8.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Qwen/Qwen2.5-Math-7B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 63970f9aa6060c87_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/63970f9aa6060c87_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: question
|
13 |
+
field_output: answer
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/e48fbbf8-54f7-401b-bc8f-38d51942e140
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/workspace/input_data/63970f9aa6060c87_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
233e1171-06fa-47f5-a61c-f0a283fd0346.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/codegemma-2b
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 5c38c9685f2d92d6_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/5c38c9685f2d92d6_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: problem
|
13 |
+
field_output: solution
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/5c2b97fc-05d6-480b-a036-352bd82cc8ed
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 9
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: GemmaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/codegemma-2b-/tmp/5c38c9685f2d92d6_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
2593284e-8d2b-49a4-9d90-a5407a2dda74.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: lmsys/vicuna-7b-v1.3
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 035ee0afc5220cd9_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/035ee0afc5220cd9_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: algo_name
|
13 |
+
field_instruction: question
|
14 |
+
field_output: answer
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/e3a35688-e9b7-46d1-8f93-172a66f78e04
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 2
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: LlamaTokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: lmsys/vicuna-7b-v1.3-/tmp/035ee0afc5220cd9_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
260bfc16-ba7e-4e28-99db-8a240db68244.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/codegemma-7b
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- c42c79a730cf3f73_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/c42c79a730cf3f73_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: prompt
|
13 |
+
field_output: chosen
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/4ef7ad27-7fca-469a-97eb-ef5effb6d888
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 2
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: GemmaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/codegemma-7b-/tmp/c42c79a730cf3f73_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
2675458f-3d2d-4c3e-b7c3-94569cdd95ed.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: bigscience/bloomz-560m
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- a6a22929b7211ec8_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/a6a22929b7211ec8_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: privacy_mask
|
13 |
+
field_instruction: masked_text
|
14 |
+
field_output: unmasked_text
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/275c513f-c7ea-4c16-b6eb-7e9453a6a0ac
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 19
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: BloomTokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: bigscience/bloomz-560m-/tmp/a6a22929b7211ec8_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
284c3982-7bc3-4e42-a78c-849f03798c5f.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Qwen2-7B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 3e306f9221b79797_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/3e306f9221b79797_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: dialogue
|
13 |
+
field_instruction: rendered_input
|
14 |
+
field_output: summary
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/f3c9c6cc-5806-45b4-aab9-d03de6022b3a
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: Qwen2TokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: unsloth/Qwen2-7B-Instruct-/workspace/input_data/3e306f9221b79797_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
29594d3f-2fa3-437a-80c0-be13835d5ddd.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: rayonlabs/0a05f4a1-93a3-48f9-8aaa-6cab9c9e3762
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- a720a9d4bd31efdf_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/a720a9d4bd31efdf_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: context
|
13 |
+
field_instruction: question
|
14 |
+
field_output: final_decision
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/71efd339-65b7-418d-8796-5d4030d63c4a
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 19
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: <|end_of_text|>
|
42 |
+
tokenizer_type: PreTrainedTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: rayonlabs/0a05f4a1-93a3-48f9-8aaa-6cab9c9e3762-/tmp/a720a9d4bd31efdf_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
29e89a2c-6136-48b6-88bc-a0066652be7d.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: JackFram/llama-68m
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- ff3a521d02fa72b2_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/ff3a521d02fa72b2_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: context
|
13 |
+
field_output: question
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/4ada8092-cc1e-445c-9260-a580ef2586ae
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
special_tokens:
|
40 |
+
pad_token: </s>
|
41 |
+
tokenizer_type: LlamaTokenizerFast
|
42 |
+
train_on_inputs: false
|
43 |
+
trust_remote_code: true
|
44 |
+
val_set_size: 0.1
|
45 |
+
wandb_entity: ''
|
46 |
+
wandb_mode: online
|
47 |
+
wandb_name: JackFram/llama-68m-/workspace/input_data/ff3a521d02fa72b2_train_data.json
|
48 |
+
wandb_project: Gradients-On-Demand
|
49 |
+
wandb_run: your_name
|
50 |
+
wandb_runid: default
|
51 |
+
warmup_ratio: 0.05
|
52 |
+
xformers_attention: true
|
2b93d38c-cfcf-4c91-bd74-d78b57d6e88c.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Qwen/Qwen2-0.5B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 4c9e3de49f6146d7_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/4c9e3de49f6146d7_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: instruction
|
13 |
+
field_output: solution
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/92a2f682-b071-4b7f-b1ad-9d92ad248d58
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: Qwen/Qwen2-0.5B-Instruct-/workspace/input_data/4c9e3de49f6146d7_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: NousResearch/Llama-3.2-1B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- f51beb4c568b9128_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/f51beb4c568b9128_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: keywords
|
13 |
+
field_instruction: idea
|
14 |
+
field_output: full_response
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/0c2649cc-2fe7-4e88-b672-6da1fee4001f
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: <|end_of_text|>
|
42 |
+
tokenizer_type: PreTrainedTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/f51beb4c568b9128_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
2db36e53-9fad-4e67-b969-b3cc09303391.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: NousResearch/Llama-3.2-1B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- bb5c3bd8ee309eb0_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/bb5c3bd8ee309eb0_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: system_prompt
|
13 |
+
field_instruction: question
|
14 |
+
field_output: response
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/60e7a811-5939-479a-aa70-b97de53f693d
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 19
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: <|end_of_text|>
|
42 |
+
tokenizer_type: PreTrainedTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: NousResearch/Llama-3.2-1B-/tmp/bb5c3bd8ee309eb0_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: katuni4ka/tiny-random-qwen1.5-moe
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 95544452e61c7393_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/95544452e61c7393_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: input
|
13 |
+
field_instruction: instruction
|
14 |
+
field_output: output
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/e61e89f0-854a-4922-8d25-dae435e91af0
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: Qwen2TokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: katuni4ka/tiny-random-qwen1.5-moe-/workspace/input_data/95544452e61c7393_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
2fe0c844-e98c-476d-b9a0-1a41beb91022.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Qwen2.5-Math-1.5B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- d16d347b651ede3e_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/d16d347b651ede3e_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: aspect_list
|
13 |
+
field_output: caption_summary
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/d2eafa75-a7f1-408a-a817-38be914edc2d
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/Qwen2.5-Math-1.5B-Instruct-/workspace/input_data/d16d347b651ede3e_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
30e923d1-5b61-4748-ad75-b5645e8a66f9.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 8cea1b501202bc61_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/8cea1b501202bc61_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: principle
|
13 |
+
field_output: instruction
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/4797a41f-d00c-44df-82e0-f23102492c0b
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 2
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
special_tokens:
|
40 |
+
pad_token: <|eot_id|>
|
41 |
+
tokenizer_type: PreTrainedTokenizerFast
|
42 |
+
train_on_inputs: false
|
43 |
+
trust_remote_code: true
|
44 |
+
val_set_size: 0.1
|
45 |
+
wandb_entity: ''
|
46 |
+
wandb_mode: online
|
47 |
+
wandb_name: VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct-/tmp/8cea1b501202bc61_train_data.json
|
48 |
+
wandb_project: Gradients-On-Demand
|
49 |
+
wandb_run: your_name
|
50 |
+
wandb_runid: default
|
51 |
+
warmup_ratio: 0.05
|
52 |
+
xformers_attention: true
|
356b1304-361f-49fb-bc60-43e6188679c1.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: scb10x/llama-3-typhoon-v1.5-8b-instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 3d40e8d1c58c211e_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/3d40e8d1c58c211e_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: abstract
|
13 |
+
field_instruction: title
|
14 |
+
field_output: target
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/105663aa-ca91-474c-9030-957c22705518
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: PreTrainedTokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: scb10x/llama-3-typhoon-v1.5-8b-instruct-/tmp/3d40e8d1c58c211e_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
36a43860-f8fa-4c32-afb5-c665be741dc4.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: NousResearch/Yarn-Solar-10b-32k
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 7bb5c8c129066fca_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/7bb5c8c129066fca_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: prompt
|
13 |
+
field_output: chosen
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/d02ea0f4-0dee-4d3c-9f12-743ce16384f1
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 1
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
special_tokens:
|
40 |
+
pad_token: </s>
|
41 |
+
tokenizer_type: LlamaTokenizerFast
|
42 |
+
train_on_inputs: false
|
43 |
+
trust_remote_code: true
|
44 |
+
val_set_size: 0.1
|
45 |
+
wandb_entity: ''
|
46 |
+
wandb_mode: online
|
47 |
+
wandb_name: NousResearch/Yarn-Solar-10b-32k-/tmp/7bb5c8c129066fca_train_data.json
|
48 |
+
wandb_project: Gradients-On-Demand
|
49 |
+
wandb_run: your_name
|
50 |
+
wandb_runid: default
|
51 |
+
warmup_ratio: 0.05
|
52 |
+
xformers_attention: true
|
3797da9e-eaaf-4f36-ac37-50d8b1c8015a.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: furiosa-ai/mlperf-gpt-j-6b
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 80b3f2b5f3ce3209_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/80b3f2b5f3ce3209_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: headline_a
|
13 |
+
field_instruction: rendered_input
|
14 |
+
field_output: headline_b
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/5e183a09-3809-4b42-97f6-0e567d3b687b
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 3
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: GPT2TokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: furiosa-ai/mlperf-gpt-j-6b-/tmp/80b3f2b5f3ce3209_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
393d9fe7-0c17-4f36-b481-1a50ecf87c09.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/SmolLM-360M
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- a84f26482cb377ef_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/a84f26482cb377ef_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: related_work
|
13 |
+
field_output: abstract
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/74db94dc-a2aa-4bd1-842c-ee6a65969df2
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: GPT2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/SmolLM-360M-/workspace/input_data/a84f26482cb377ef_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
39a09fca-d7db-49b6-9d31-760f252e1a05.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- ae60825aae6334e4_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/ae60825aae6334e4_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: instruction
|
13 |
+
field_output: output
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/8e5e035b-ad91-4c67-a2c7-1a524b7cc337
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 2
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: GemmaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2-/tmp/ae60825aae6334e4_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
3b1bd1a9-4f82-45c0-b25f-14c4857d43f1.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Qwen/Qwen2.5-1.5B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- f14707e620deedc0_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/f14707e620deedc0_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: problem
|
13 |
+
field_instruction: prompt
|
14 |
+
field_output: solution
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/1d2ad8c0-863e-4389-8dcc-4c05af9b0cfd
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 3
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: Qwen2TokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: Qwen/Qwen2.5-1.5B-Instruct-/tmp/f14707e620deedc0_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
3c645e30-be80-48c3-9eda-ca9a31c5ac1d.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: NousResearch/Meta-Llama-3-8B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 932456af08c12528_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/932456af08c12528_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: genres
|
13 |
+
field_instruction: primaryTitle
|
14 |
+
field_output: text
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: SystemAdmin123/66a854ff-b78c-45c7-abb2-367e9ad0a406
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
micro_batch_size: 2
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
num_epochs: 10
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: <|end_of_text|>
|
42 |
+
tokenizer_type: PreTrainedTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: NousResearch/Meta-Llama-3-8B-/tmp/932456af08c12528_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
3d940075-c928-42d9-8a10-07211af2fe5d.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/codegemma-7b-it
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- a4ca6efb0404ddfb_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/a4ca6efb0404ddfb_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: references
|
13 |
+
field_output: prompt
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/0c0e5d3f-1349-46d3-9234-4485480fad62
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: GemmaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/codegemma-7b-it-/workspace/input_data/a4ca6efb0404ddfb_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
40f27435-f59d-488f-b2d6-01e356d79c48.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Qwen/Qwen2-1.5B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- df925134bb2c32b8_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/df925134bb2c32b8_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: prompt
|
13 |
+
field_output: amoral
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: willtensora/ba640bbe-3257-40d8-88fe-26152f412bb7
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
max_steps: 2500
|
29 |
+
micro_batch_size: 4
|
30 |
+
model_type: AutoModelForCausalLM
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: Qwen/Qwen2-1.5B-Instruct-/tmp/df925134bb2c32b8_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
419d7d82-9c5d-40a8-9096-ea94ed8503b4.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: Qwen/Qwen2.5-Math-7B-Instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- f58008447a01a2e1_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/f58008447a01a2e1_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: question
|
13 |
+
field_output: answer
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/0b0d8bdd-1df1-4e38-93cb-eb2f50e3e64a
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 2
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/tmp/f58008447a01a2e1_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
424a19bd-9a6f-4171-a205-745ee0bc1a03.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Mistral-Nemo-Instruct-2407
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 2ad9a6f08f511a85_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/2ad9a6f08f511a85_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: question
|
13 |
+
field_output: reponses
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/2ba978bd-1973-4dbe-93d9-13ea0d52bfde
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 19
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: PreTrainedTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/Mistral-Nemo-Instruct-2407-/tmp/2ad9a6f08f511a85_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
427d02be-6008-4556-9a5e-9c7cb7503058.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Phi-3.5-mini-instruct
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 7e5b54272524b996_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/7e5b54272524b996_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: input
|
13 |
+
field_instruction: instruction
|
14 |
+
field_output: output
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/ae26a9e9-089e-4d4a-b592-d8935df7c18d
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
tokenizer_type: LlamaTokenizerFast
|
41 |
+
train_on_inputs: false
|
42 |
+
trust_remote_code: true
|
43 |
+
val_set_size: 0.1
|
44 |
+
wandb_entity: ''
|
45 |
+
wandb_mode: online
|
46 |
+
wandb_name: unsloth/Phi-3.5-mini-instruct-/workspace/input_data/7e5b54272524b996_train_data.json
|
47 |
+
wandb_project: Gradients-On-Demand
|
48 |
+
wandb_run: your_name
|
49 |
+
wandb_runid: default
|
50 |
+
warmup_ratio: 0.05
|
51 |
+
xformers_attention: true
|
428f88e9-4d8c-411a-a2d9-f723f8dc8229.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: dltjdgh0928/test_instruction
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 7e1c4c23d4464ec4_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/7e1c4c23d4464ec4_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: line
|
13 |
+
field_output: values
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/f921eed7-e3a9-4535-800a-6dca03a54aaa
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 19
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: LlamaTokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: dltjdgh0928/test_instruction-/tmp/7e1c4c23d4464ec4_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|
44d8aa1a-ecd0-42f5-80d3-60ff76d8044e.yml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: NousResearch/Llama-3.2-1B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- 05366f42f2c8edcd_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/05366f42f2c8edcd_train_data.json
|
11 |
+
type:
|
12 |
+
field_input: schema
|
13 |
+
field_instruction: question
|
14 |
+
field_output: output
|
15 |
+
format: '{instruction} {input}'
|
16 |
+
no_input_format: '{instruction}'
|
17 |
+
system_format: '{system}'
|
18 |
+
system_prompt: ''
|
19 |
+
eval_steps: 20
|
20 |
+
flash_attention: true
|
21 |
+
gpu_memory_limit: 80GiB
|
22 |
+
gradient_checkpointing: true
|
23 |
+
group_by_length: true
|
24 |
+
hub_model_id: willtensora/5a684b11-35f0-45eb-8ff7-2641074d853f
|
25 |
+
hub_strategy: checkpoint
|
26 |
+
learning_rate: 0.0002
|
27 |
+
logging_steps: 10
|
28 |
+
lr_scheduler: cosine
|
29 |
+
max_steps: 2500
|
30 |
+
micro_batch_size: 4
|
31 |
+
model_type: AutoModelForCausalLM
|
32 |
+
optimizer: adamw_bnb_8bit
|
33 |
+
output_dir: /workspace/axolotl/configs
|
34 |
+
pad_to_sequence_len: true
|
35 |
+
resize_token_embeddings_to_32x: false
|
36 |
+
sample_packing: false
|
37 |
+
save_steps: 40
|
38 |
+
save_total_limit: 1
|
39 |
+
sequence_len: 2048
|
40 |
+
special_tokens:
|
41 |
+
pad_token: <|end_of_text|>
|
42 |
+
tokenizer_type: PreTrainedTokenizerFast
|
43 |
+
train_on_inputs: false
|
44 |
+
trust_remote_code: true
|
45 |
+
val_set_size: 0.1
|
46 |
+
wandb_entity: ''
|
47 |
+
wandb_mode: online
|
48 |
+
wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/05366f42f2c8edcd_train_data.json
|
49 |
+
wandb_project: Gradients-On-Demand
|
50 |
+
wandb_run: your_name
|
51 |
+
wandb_runid: default
|
52 |
+
warmup_ratio: 0.05
|
53 |
+
xformers_attention: true
|
4524a254-d619-44de-addb-80d1a56f0224.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: katuni4ka/tiny-random-falcon-40b
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- b30c17b6a3700eb0_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/b30c17b6a3700eb0_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: hieroglyphs
|
13 |
+
field_output: translation
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/f64de308-5d39-4088-bb11-f9b890e56369
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 0
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
special_tokens:
|
40 |
+
pad_token: <|endoftext|>
|
41 |
+
tokenizer_type: PreTrainedTokenizerFast
|
42 |
+
train_on_inputs: false
|
43 |
+
trust_remote_code: true
|
44 |
+
val_set_size: 0.1
|
45 |
+
wandb_entity: ''
|
46 |
+
wandb_mode: online
|
47 |
+
wandb_name: katuni4ka/tiny-random-falcon-40b-/tmp/b30c17b6a3700eb0_train_data.json
|
48 |
+
wandb_project: Gradients-On-Demand
|
49 |
+
wandb_run: your_name
|
50 |
+
wandb_runid: default
|
51 |
+
warmup_ratio: 0.05
|
52 |
+
xformers_attention: true
|
4692c3b1-0351-4533-948d-ace8c76ceb1f.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: unsloth/Qwen2.5-0.5B
|
2 |
+
batch_size: 32
|
3 |
+
bf16: true
|
4 |
+
chat_template: tokenizer_default_fallback_alpaca
|
5 |
+
datasets:
|
6 |
+
- data_files:
|
7 |
+
- e79aa413a56fb417_train_data.json
|
8 |
+
ds_type: json
|
9 |
+
format: custom
|
10 |
+
path: /workspace/input_data/e79aa413a56fb417_train_data.json
|
11 |
+
type:
|
12 |
+
field_instruction: prompt
|
13 |
+
field_output: chosen
|
14 |
+
format: '{instruction}'
|
15 |
+
no_input_format: '{instruction}'
|
16 |
+
system_format: '{system}'
|
17 |
+
system_prompt: ''
|
18 |
+
eval_steps: 20
|
19 |
+
flash_attention: true
|
20 |
+
gpu_memory_limit: 80GiB
|
21 |
+
gradient_checkpointing: true
|
22 |
+
group_by_length: true
|
23 |
+
hub_model_id: SystemAdmin123/c9c706d7-4304-4009-a2b2-f6bd643f09f1
|
24 |
+
hub_strategy: checkpoint
|
25 |
+
learning_rate: 0.0002
|
26 |
+
logging_steps: 10
|
27 |
+
lr_scheduler: cosine
|
28 |
+
micro_batch_size: 3
|
29 |
+
model_type: AutoModelForCausalLM
|
30 |
+
num_epochs: 10
|
31 |
+
optimizer: adamw_bnb_8bit
|
32 |
+
output_dir: /workspace/axolotl/configs
|
33 |
+
pad_to_sequence_len: true
|
34 |
+
resize_token_embeddings_to_32x: false
|
35 |
+
sample_packing: false
|
36 |
+
save_steps: 40
|
37 |
+
save_total_limit: 1
|
38 |
+
sequence_len: 2048
|
39 |
+
tokenizer_type: Qwen2TokenizerFast
|
40 |
+
train_on_inputs: false
|
41 |
+
trust_remote_code: true
|
42 |
+
val_set_size: 0.1
|
43 |
+
wandb_entity: ''
|
44 |
+
wandb_mode: online
|
45 |
+
wandb_name: unsloth/Qwen2.5-0.5B-/tmp/e79aa413a56fb417_train_data.json
|
46 |
+
wandb_project: Gradients-On-Demand
|
47 |
+
wandb_run: your_name
|
48 |
+
wandb_runid: default
|
49 |
+
warmup_ratio: 0.05
|
50 |
+
xformers_attention: true
|