SystemAdmin123 commited on
Commit
e876591
·
verified ·
1 Parent(s): 36f10f5

Training in progress, step 40

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 00000000-0000-0000-0000-000000000000.yml +48 -0
  2. 00000000-0000-0000-0000-000000000001.yml +48 -0
  3. 00000000-0000-0000-0000-000000000123.yml +50 -0
  4. 0017f0db-d7f8-4f78-9dd4-effb388c566e.yml +50 -0
  5. 01b60291-41f3-4631-b7e8-f7c60c2ca163.yml +53 -0
  6. 03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml +52 -0
  7. 0573e27c-6a6f-4989-9b30-ba29f333396d.yml +50 -0
  8. 077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml +51 -0
  9. 078ba83d-394d-4561-8dc4-25f7f5d342f4.yml +50 -0
  10. 08edabf0-86e9-4b88-b7f8-7383b8455fd9.yml +51 -0
  11. 09b9ec60-551f-42fb-a5bc-3bb998c6bcb7.yml +50 -0
  12. 102906fb-e0c6-458b-91ed-ea75a39fdc12.yml +50 -0
  13. 15b64bb1-e5f8-4728-878b-8b09852c0c75.yml +51 -0
  14. 19c8688e-ea72-45f4-ad76-056d1e3fe378.yml +53 -0
  15. 1a0ae1a8-90b0-4064-a957-ec08b8a626a2.yml +51 -0
  16. 1dc178e8-8f66-48ae-8ebb-825428c168d0.yml +53 -0
  17. 1e84d93c-fc85-4f62-a6d4-0e9ec4aefaa5.yml +50 -0
  18. 1ead3944-f2ff-490c-8e71-0e1cf0736354.yml +52 -0
  19. 21315ae5-16ee-43cd-9612-743524060933.yml +50 -0
  20. 2135dd61-bfbc-4980-8694-eb2672292c86.yml +50 -0
  21. 2311c30b-5869-4555-8890-0c7c01a7e6d8.yml +50 -0
  22. 233e1171-06fa-47f5-a61c-f0a283fd0346.yml +50 -0
  23. 2593284e-8d2b-49a4-9d90-a5407a2dda74.yml +51 -0
  24. 260bfc16-ba7e-4e28-99db-8a240db68244.yml +50 -0
  25. 2675458f-3d2d-4c3e-b7c3-94569cdd95ed.yml +51 -0
  26. 284c3982-7bc3-4e42-a78c-849f03798c5f.yml +51 -0
  27. 29594d3f-2fa3-437a-80c0-be13835d5ddd.yml +53 -0
  28. 29e89a2c-6136-48b6-88bc-a0066652be7d.yml +52 -0
  29. 2b93d38c-cfcf-4c91-bd74-d78b57d6e88c.yml +50 -0
  30. 2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml +53 -0
  31. 2db36e53-9fad-4e67-b969-b3cc09303391.yml +53 -0
  32. 2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml +51 -0
  33. 2fe0c844-e98c-476d-b9a0-1a41beb91022.yml +50 -0
  34. 30e923d1-5b61-4748-ad75-b5645e8a66f9.yml +52 -0
  35. 356b1304-361f-49fb-bc60-43e6188679c1.yml +51 -0
  36. 36a43860-f8fa-4c32-afb5-c665be741dc4.yml +52 -0
  37. 3797da9e-eaaf-4f36-ac37-50d8b1c8015a.yml +51 -0
  38. 393d9fe7-0c17-4f36-b481-1a50ecf87c09.yml +50 -0
  39. 39a09fca-d7db-49b6-9d31-760f252e1a05.yml +50 -0
  40. 3b1bd1a9-4f82-45c0-b25f-14c4857d43f1.yml +51 -0
  41. 3c645e30-be80-48c3-9eda-ca9a31c5ac1d.yml +53 -0
  42. 3d940075-c928-42d9-8a10-07211af2fe5d.yml +50 -0
  43. 40f27435-f59d-488f-b2d6-01e356d79c48.yml +50 -0
  44. 419d7d82-9c5d-40a8-9096-ea94ed8503b4.yml +50 -0
  45. 424a19bd-9a6f-4171-a205-745ee0bc1a03.yml +50 -0
  46. 427d02be-6008-4556-9a5e-9c7cb7503058.yml +51 -0
  47. 428f88e9-4d8c-411a-a2d9-f723f8dc8229.yml +50 -0
  48. 44d8aa1a-ecd0-42f5-80d3-60ff76d8044e.yml +53 -0
  49. 4524a254-d619-44de-addb-80d1a56f0224.yml +52 -0
  50. 4692c3b1-0351-4533-948d-ace8c76ceb1f.yml +50 -0
00000000-0000-0000-0000-000000000000.yml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: peft-internal-testing/tiny-dummy-qwen2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - format: custom
7
+ path: argilla/databricks-dolly-15k-curated-en
8
+ type:
9
+ field_input: original-instruction
10
+ field_instruction: original-instruction
11
+ field_output: original-response
12
+ format: '{instruction} {input}'
13
+ no_input_format: '{instruction}'
14
+ system_format: '{system}'
15
+ system_prompt: ''
16
+ eval_steps: 20
17
+ flash_attention: true
18
+ gpu_memory_limit: 80GiB
19
+ gradient_checkpointing: true
20
+ group_by_length: true
21
+ hub_model_id: SystemAdmin123/test-repo
22
+ hub_strategy: checkpoint
23
+ learning_rate: 0.0002
24
+ logging_steps: 10
25
+ lr_scheduler: cosine
26
+ micro_batch_size: 19
27
+ model_type: AutoModelForCausalLM
28
+ num_epochs: 10
29
+ optimizer: adamw_bnb_8bit
30
+ output_dir: /workspace/axolotl/configs
31
+ pad_to_sequence_len: true
32
+ resize_token_embeddings_to_32x: false
33
+ sample_packing: false
34
+ save_steps: 40
35
+ save_total_limit: 1
36
+ sequence_len: 2048
37
+ tokenizer_type: Qwen2TokenizerFast
38
+ train_on_inputs: false
39
+ trust_remote_code: true
40
+ val_set_size: 0.1
41
+ wandb_entity: ''
42
+ wandb_mode: online
43
+ wandb_name: peft-internal-testing/tiny-dummy-qwen2-argilla/databricks-dolly-15k-curated-en
44
+ wandb_project: Gradients-On-Demand
45
+ wandb_run: your_name
46
+ wandb_runid: default
47
+ warmup_ratio: 0.05
48
+ xformers_attention: true
00000000-0000-0000-0000-000000000001.yml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-7B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - format: custom
7
+ path: argilla/databricks-dolly-15k-curated-en
8
+ type:
9
+ field_input: original-instruction
10
+ field_instruction: original-instruction
11
+ field_output: original-response
12
+ format: '{instruction} {input}'
13
+ no_input_format: '{instruction}'
14
+ system_format: '{system}'
15
+ system_prompt: ''
16
+ eval_steps: 20
17
+ flash_attention: true
18
+ gpu_memory_limit: 80GiB
19
+ gradient_checkpointing: true
20
+ group_by_length: true
21
+ hub_model_id: SystemAdmin123/test-repo
22
+ hub_strategy: checkpoint
23
+ learning_rate: 0.0002
24
+ logging_steps: 10
25
+ lr_scheduler: cosine
26
+ max_steps: 2500
27
+ micro_batch_size: 4
28
+ model_type: AutoModelForCausalLM
29
+ optimizer: adamw_bnb_8bit
30
+ output_dir: /workspace/axolotl/configs
31
+ pad_to_sequence_len: true
32
+ resize_token_embeddings_to_32x: false
33
+ sample_packing: false
34
+ save_steps: 40
35
+ save_total_limit: 1
36
+ sequence_len: 2048
37
+ tokenizer_type: Qwen2TokenizerFast
38
+ train_on_inputs: false
39
+ trust_remote_code: true
40
+ val_set_size: 0.1
41
+ wandb_entity: ''
42
+ wandb_mode: online
43
+ wandb_name: Qwen/Qwen2-7B-Instruct-argilla/databricks-dolly-15k-curated-en
44
+ wandb_project: Gradients-On-Demand
45
+ wandb_run: your_name
46
+ wandb_runid: default
47
+ warmup_ratio: 0.05
48
+ xformers_attention: true
00000000-0000-0000-0000-000000000123.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: fxmarty/small-llama-testing
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - format: custom
7
+ path: argilla/databricks-dolly-15k-curated-en
8
+ type:
9
+ field_input: original-instruction
10
+ field_instruction: original-instruction
11
+ field_output: original-response
12
+ format: '{instruction} {input}'
13
+ no_input_format: '{instruction}'
14
+ system_format: '{system}'
15
+ system_prompt: ''
16
+ eval_steps: 20
17
+ flash_attention: true
18
+ gpu_memory_limit: 80GiB
19
+ gradient_checkpointing: true
20
+ group_by_length: true
21
+ hub_model_id: SystemAdmin123/test-repo
22
+ hub_strategy: checkpoint
23
+ learning_rate: 0.0002
24
+ logging_steps: 10
25
+ lr_scheduler: cosine
26
+ micro_batch_size: 19
27
+ model_type: AutoModelForCausalLM
28
+ num_epochs: 10
29
+ optimizer: adamw_bnb_8bit
30
+ output_dir: /workspace/axolotl/configs
31
+ pad_to_sequence_len: true
32
+ resize_token_embeddings_to_32x: false
33
+ sample_packing: false
34
+ save_steps: 40
35
+ save_total_limit: 1
36
+ sequence_len: 2048
37
+ special_tokens:
38
+ pad_token: </s>
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: fxmarty/small-llama-testing-argilla/databricks-dolly-15k-curated-en
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
0017f0db-d7f8-4f78-9dd4-effb388c566e.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2.5-Math-7B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - e379af78ad5f53fa_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/e379af78ad5f53fa_train_data.json
11
+ type:
12
+ field_instruction: SeedRule
13
+ field_output: prompt
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/1b230251-8147-4d9a-b940-904d5e878a9d
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 2
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/tmp/e379af78ad5f53fa_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
01b60291-41f3-4631-b7e8-f7c60c2ca163.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-7b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ccd32583f980ebf0_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ccd32583f980ebf0_train_data.json
11
+ type:
12
+ field_input: ''
13
+ field_instruction: problem
14
+ field_output: solution
15
+ format: '{instruction}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/fa36bf4c-34a6-4e51-ae14-a8372bf92b39
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: </s>
42
+ tokenizer_type: LlamaTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: huggyllama/llama-7b-/workspace/input_data/ccd32583f980ebf0_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: fxmarty/tiny-llama-fast-tokenizer
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - fc6136aac03f618a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/fc6136aac03f618a_train_data.json
11
+ type:
12
+ field_instruction: text
13
+ field_output: title
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
0573e27c-6a6f-4989-9b30-ba29f333396d.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: dltjdgh0928/test_instruction
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 76022a30315552b8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/76022a30315552b8_train_data.json
11
+ type:
12
+ field_instruction: input
13
+ field_output: target
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/c32e2a6e-d46b-44d8-953f-917301378acd
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: dltjdgh0928/test_instruction-/workspace/input_data/76022a30315552b8_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/mistral-7b-v0.3
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ca0152973425c947_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ca0152973425c947_train_data.json
11
+ type:
12
+ field_input: code
13
+ field_instruction: func_name
14
+ field_output: docstring
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/5a2f5ce6-446b-4282-bb4d-9ee4e970231f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/mistral-7b-v0.3-/tmp/ca0152973425c947_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
078ba83d-394d-4561-8dc4-25f7f5d342f4.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: numind/NuExtract-v1.5
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b23f426a106701fe_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b23f426a106701fe_train_data.json
11
+ type:
12
+ field_instruction: question
13
+ field_output: answer
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/53cbcb68-02e6-4a0d-ae57-14c9018f7e1d
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: numind/NuExtract-v1.5-/workspace/input_data/b23f426a106701fe_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
08edabf0-86e9-4b88-b7f8-7383b8455fd9.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: facebook/opt-350m
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 5502b08b9b3b41f5_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/5502b08b9b3b41f5_train_data.json
11
+ type:
12
+ field_input: function
13
+ field_instruction: cwe_description
14
+ field_output: cve_description
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/ea3910f1-2762-4e77-bf49-50261890a7c3
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 19
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: facebook/opt-350m-/tmp/5502b08b9b3b41f5_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
09b9ec60-551f-42fb-a5bc-3bb998c6bcb7.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2-0.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 86ba74df94452c98_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/86ba74df94452c98_train_data.json
11
+ type:
12
+ field_instruction: instruction
13
+ field_output: output
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/6f0d89e4-45b1-4dc8-877b-bec29de112de
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Qwen2-0.5B-Instruct-/workspace/input_data/86ba74df94452c98_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
102906fb-e0c6-458b-91ed-ea75a39fdc12.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3.5-mini-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 286ecee6d914760d_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/286ecee6d914760d_train_data.json
11
+ type:
12
+ field_instruction: ja
13
+ field_output: en
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/df962b68-cd57-4f76-a085-6da16b65c260
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 19
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Phi-3.5-mini-instruct-/tmp/286ecee6d914760d_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
15b64bb1-e5f8-4728-878b-8b09852c0c75.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Eurdem/Defne_llama3_2x8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 235dcd465bd8663e_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/235dcd465bd8663e_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/4865f614-7ca8-416c-a631-a000a6af7a20
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 2
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: PreTrainedTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: Eurdem/Defne_llama3_2x8B-/tmp/235dcd465bd8663e_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
19c8688e-ea72-45f4-ad76-056d1e3fe378.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-7b-Instruct-hf
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 0af14c27ef012868_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/0af14c27ef012868_train_data.json
11
+ type:
12
+ field_input: text
13
+ field_instruction: subject
14
+ field_output: title
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/69c5f55b-dddc-4b99-a936-751161256f95
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: </s>
42
+ tokenizer_type: CodeLlamaTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: codellama/CodeLlama-7b-Instruct-hf-/workspace/input_data/0af14c27ef012868_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
1a0ae1a8-90b0-4064-a957-ec08b8a626a2.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: jhflow/mistral7b-lora-multi-turn-v2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 77a0df1c69074e81_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/77a0df1c69074e81_train_data.json
11
+ type:
12
+ field_input: keyword
13
+ field_instruction: abstract
14
+ field_output: title
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/0c660dfd-6e21-4c2b-9403-0c4b6ad1bf52
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 2
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: jhflow/mistral7b-lora-multi-turn-v2-/tmp/77a0df1c69074e81_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
1dc178e8-8f66-48ae-8ebb-825428c168d0.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Yarn-Mistral-7b-64k
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - bccab6bcbcb6fc03_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/bccab6bcbcb6fc03_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: full_prompt
14
+ field_output: example
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/93085afc-6d0b-49ca-ac4a-839ea57462a9
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: </s>
42
+ tokenizer_type: LlamaTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: NousResearch/Yarn-Mistral-7b-64k-/workspace/input_data/bccab6bcbcb6fc03_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
1e84d93c-fc85-4f62-a6d4-0e9ec4aefaa5.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: fxmarty/tiny-dummy-qwen2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - c0a032ebb939dd62_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/c0a032ebb939dd62_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: chosen
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/da53a378-c2cd-4060-af7d-31ee77ad94d8
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 19
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: fxmarty/tiny-dummy-qwen2-/tmp/c0a032ebb939dd62_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
1ead3944-f2ff-490c-8e71-0e1cf0736354.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Meta-Llama-3-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 079da9e23dfe7fea_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/079da9e23dfe7fea_train_data.json
11
+ type:
12
+ field_instruction: filename
13
+ field_output: title
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/3cb570bf-4ffe-411e-86e0-ad8b9bf19f1b
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: <|end_of_text|>
41
+ tokenizer_type: PreTrainedTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: NousResearch/Meta-Llama-3-8B-/workspace/input_data/079da9e23dfe7fea_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
21315ae5-16ee-43cd-9612-743524060933.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Meta-Llama-3.1-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 562fa3aeea07046a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/562fa3aeea07046a_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: text
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/c4596edc-efad-4776-86a1-caa06bffcada
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: PreTrainedTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Meta-Llama-3.1-8B-/workspace/input_data/562fa3aeea07046a_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
2135dd61-bfbc-4980-8694-eb2672292c86.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: peft-internal-testing/tiny-dummy-qwen2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 7041e6a7c6976935_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/7041e6a7c6976935_train_data.json
11
+ type:
12
+ field_instruction: Sequence
13
+ field_output: Secondary_structure
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/aa07d7d0-0e68-4d05-a943-be6d5a82bcf5
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 19
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: peft-internal-testing/tiny-dummy-qwen2-/tmp/7041e6a7c6976935_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
2311c30b-5869-4555-8890-0c7c01a7e6d8.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2.5-Math-7B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 63970f9aa6060c87_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/63970f9aa6060c87_train_data.json
11
+ type:
12
+ field_instruction: question
13
+ field_output: answer
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/e48fbbf8-54f7-401b-bc8f-38d51942e140
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/workspace/input_data/63970f9aa6060c87_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
233e1171-06fa-47f5-a61c-f0a283fd0346.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/codegemma-2b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 5c38c9685f2d92d6_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/5c38c9685f2d92d6_train_data.json
11
+ type:
12
+ field_instruction: problem
13
+ field_output: solution
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/5c2b97fc-05d6-480b-a036-352bd82cc8ed
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 9
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: GemmaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/codegemma-2b-/tmp/5c38c9685f2d92d6_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
2593284e-8d2b-49a4-9d90-a5407a2dda74.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: lmsys/vicuna-7b-v1.3
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 035ee0afc5220cd9_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/035ee0afc5220cd9_train_data.json
11
+ type:
12
+ field_input: algo_name
13
+ field_instruction: question
14
+ field_output: answer
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/e3a35688-e9b7-46d1-8f93-172a66f78e04
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 2
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: lmsys/vicuna-7b-v1.3-/tmp/035ee0afc5220cd9_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
260bfc16-ba7e-4e28-99db-8a240db68244.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/codegemma-7b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - c42c79a730cf3f73_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/c42c79a730cf3f73_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: chosen
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/4ef7ad27-7fca-469a-97eb-ef5effb6d888
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 2
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: GemmaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/codegemma-7b-/tmp/c42c79a730cf3f73_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
2675458f-3d2d-4c3e-b7c3-94569cdd95ed.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: bigscience/bloomz-560m
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - a6a22929b7211ec8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/a6a22929b7211ec8_train_data.json
11
+ type:
12
+ field_input: privacy_mask
13
+ field_instruction: masked_text
14
+ field_output: unmasked_text
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/275c513f-c7ea-4c16-b6eb-7e9453a6a0ac
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 19
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: BloomTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: bigscience/bloomz-560m-/tmp/a6a22929b7211ec8_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
284c3982-7bc3-4e42-a78c-849f03798c5f.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2-7B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 3e306f9221b79797_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/3e306f9221b79797_train_data.json
11
+ type:
12
+ field_input: dialogue
13
+ field_instruction: rendered_input
14
+ field_output: summary
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/f3c9c6cc-5806-45b4-aab9-d03de6022b3a
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Qwen2-7B-Instruct-/workspace/input_data/3e306f9221b79797_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
29594d3f-2fa3-437a-80c0-be13835d5ddd.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: rayonlabs/0a05f4a1-93a3-48f9-8aaa-6cab9c9e3762
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - a720a9d4bd31efdf_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/a720a9d4bd31efdf_train_data.json
11
+ type:
12
+ field_input: context
13
+ field_instruction: question
14
+ field_output: final_decision
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/71efd339-65b7-418d-8796-5d4030d63c4a
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 19
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|end_of_text|>
42
+ tokenizer_type: PreTrainedTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: rayonlabs/0a05f4a1-93a3-48f9-8aaa-6cab9c9e3762-/tmp/a720a9d4bd31efdf_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
29e89a2c-6136-48b6-88bc-a0066652be7d.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: JackFram/llama-68m
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ff3a521d02fa72b2_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ff3a521d02fa72b2_train_data.json
11
+ type:
12
+ field_instruction: context
13
+ field_output: question
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/4ada8092-cc1e-445c-9260-a580ef2586ae
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: JackFram/llama-68m-/workspace/input_data/ff3a521d02fa72b2_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
2b93d38c-cfcf-4c91-bd74-d78b57d6e88c.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-0.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 4c9e3de49f6146d7_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/4c9e3de49f6146d7_train_data.json
11
+ type:
12
+ field_instruction: instruction
13
+ field_output: solution
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/92a2f682-b071-4b7f-b1ad-9d92ad248d58
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2-0.5B-Instruct-/workspace/input_data/4c9e3de49f6146d7_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Llama-3.2-1B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f51beb4c568b9128_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f51beb4c568b9128_train_data.json
11
+ type:
12
+ field_input: keywords
13
+ field_instruction: idea
14
+ field_output: full_response
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/0c2649cc-2fe7-4e88-b672-6da1fee4001f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|end_of_text|>
42
+ tokenizer_type: PreTrainedTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/f51beb4c568b9128_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
2db36e53-9fad-4e67-b969-b3cc09303391.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Llama-3.2-1B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - bb5c3bd8ee309eb0_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/bb5c3bd8ee309eb0_train_data.json
11
+ type:
12
+ field_input: system_prompt
13
+ field_instruction: question
14
+ field_output: response
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/60e7a811-5939-479a-aa70-b97de53f693d
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 19
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|end_of_text|>
42
+ tokenizer_type: PreTrainedTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: NousResearch/Llama-3.2-1B-/tmp/bb5c3bd8ee309eb0_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: katuni4ka/tiny-random-qwen1.5-moe
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 95544452e61c7393_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/95544452e61c7393_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/e61e89f0-854a-4922-8d25-dae435e91af0
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: katuni4ka/tiny-random-qwen1.5-moe-/workspace/input_data/95544452e61c7393_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
2fe0c844-e98c-476d-b9a0-1a41beb91022.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2.5-Math-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - d16d347b651ede3e_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/d16d347b651ede3e_train_data.json
11
+ type:
12
+ field_instruction: aspect_list
13
+ field_output: caption_summary
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/d2eafa75-a7f1-408a-a817-38be914edc2d
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Qwen2.5-Math-1.5B-Instruct-/workspace/input_data/d16d347b651ede3e_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
30e923d1-5b61-4748-ad75-b5645e8a66f9.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 8cea1b501202bc61_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/8cea1b501202bc61_train_data.json
11
+ type:
12
+ field_instruction: principle
13
+ field_output: instruction
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/4797a41f-d00c-44df-82e0-f23102492c0b
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 2
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: <|eot_id|>
41
+ tokenizer_type: PreTrainedTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct-/tmp/8cea1b501202bc61_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
356b1304-361f-49fb-bc60-43e6188679c1.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: scb10x/llama-3-typhoon-v1.5-8b-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 3d40e8d1c58c211e_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/3d40e8d1c58c211e_train_data.json
11
+ type:
12
+ field_input: abstract
13
+ field_instruction: title
14
+ field_output: target
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/105663aa-ca91-474c-9030-957c22705518
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: PreTrainedTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: scb10x/llama-3-typhoon-v1.5-8b-instruct-/tmp/3d40e8d1c58c211e_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
36a43860-f8fa-4c32-afb5-c665be741dc4.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Yarn-Solar-10b-32k
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 7bb5c8c129066fca_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/7bb5c8c129066fca_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: chosen
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/d02ea0f4-0dee-4d3c-9f12-743ce16384f1
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 1
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: NousResearch/Yarn-Solar-10b-32k-/tmp/7bb5c8c129066fca_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
3797da9e-eaaf-4f36-ac37-50d8b1c8015a.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: furiosa-ai/mlperf-gpt-j-6b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 80b3f2b5f3ce3209_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/80b3f2b5f3ce3209_train_data.json
11
+ type:
12
+ field_input: headline_a
13
+ field_instruction: rendered_input
14
+ field_output: headline_b
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/5e183a09-3809-4b42-97f6-0e567d3b687b
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 3
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: furiosa-ai/mlperf-gpt-j-6b-/tmp/80b3f2b5f3ce3209_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
393d9fe7-0c17-4f36-b481-1a50ecf87c09.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/SmolLM-360M
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - a84f26482cb377ef_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/a84f26482cb377ef_train_data.json
11
+ type:
12
+ field_instruction: related_work
13
+ field_output: abstract
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/74db94dc-a2aa-4bd1-842c-ee6a65969df2
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: GPT2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/SmolLM-360M-/workspace/input_data/a84f26482cb377ef_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
39a09fca-d7db-49b6-9d31-760f252e1a05.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ae60825aae6334e4_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ae60825aae6334e4_train_data.json
11
+ type:
12
+ field_instruction: instruction
13
+ field_output: output
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/8e5e035b-ad91-4c67-a2c7-1a524b7cc337
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 2
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: GemmaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2-/tmp/ae60825aae6334e4_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
3b1bd1a9-4f82-45c0-b25f-14c4857d43f1.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2.5-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f14707e620deedc0_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f14707e620deedc0_train_data.json
11
+ type:
12
+ field_input: problem
13
+ field_instruction: prompt
14
+ field_output: solution
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/1d2ad8c0-863e-4389-8dcc-4c05af9b0cfd
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 3
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: Qwen/Qwen2.5-1.5B-Instruct-/tmp/f14707e620deedc0_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
3c645e30-be80-48c3-9eda-ca9a31c5ac1d.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Meta-Llama-3-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 932456af08c12528_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/932456af08c12528_train_data.json
11
+ type:
12
+ field_input: genres
13
+ field_instruction: primaryTitle
14
+ field_output: text
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: SystemAdmin123/66a854ff-b78c-45c7-abb2-367e9ad0a406
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ micro_batch_size: 2
30
+ model_type: AutoModelForCausalLM
31
+ num_epochs: 10
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|end_of_text|>
42
+ tokenizer_type: PreTrainedTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: NousResearch/Meta-Llama-3-8B-/tmp/932456af08c12528_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
3d940075-c928-42d9-8a10-07211af2fe5d.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/codegemma-7b-it
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - a4ca6efb0404ddfb_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/a4ca6efb0404ddfb_train_data.json
11
+ type:
12
+ field_instruction: references
13
+ field_output: prompt
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/0c0e5d3f-1349-46d3-9234-4485480fad62
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: GemmaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/codegemma-7b-it-/workspace/input_data/a4ca6efb0404ddfb_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
40f27435-f59d-488f-b2d6-01e356d79c48.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - df925134bb2c32b8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/df925134bb2c32b8_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: amoral
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/ba640bbe-3257-40d8-88fe-26152f412bb7
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2-1.5B-Instruct-/tmp/df925134bb2c32b8_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
419d7d82-9c5d-40a8-9096-ea94ed8503b4.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2.5-Math-7B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f58008447a01a2e1_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f58008447a01a2e1_train_data.json
11
+ type:
12
+ field_instruction: question
13
+ field_output: answer
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/0b0d8bdd-1df1-4e38-93cb-eb2f50e3e64a
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 2
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/tmp/f58008447a01a2e1_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
424a19bd-9a6f-4171-a205-745ee0bc1a03.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Mistral-Nemo-Instruct-2407
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 2ad9a6f08f511a85_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/2ad9a6f08f511a85_train_data.json
11
+ type:
12
+ field_instruction: question
13
+ field_output: reponses
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/2ba978bd-1973-4dbe-93d9-13ea0d52bfde
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 19
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: PreTrainedTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Mistral-Nemo-Instruct-2407-/tmp/2ad9a6f08f511a85_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
427d02be-6008-4556-9a5e-9c7cb7503058.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3.5-mini-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 7e5b54272524b996_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/7e5b54272524b996_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/ae26a9e9-089e-4d4a-b592-d8935df7c18d
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Phi-3.5-mini-instruct-/workspace/input_data/7e5b54272524b996_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
428f88e9-4d8c-411a-a2d9-f723f8dc8229.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: dltjdgh0928/test_instruction
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 7e1c4c23d4464ec4_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/7e1c4c23d4464ec4_train_data.json
11
+ type:
12
+ field_instruction: line
13
+ field_output: values
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/f921eed7-e3a9-4535-800a-6dca03a54aaa
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 19
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: dltjdgh0928/test_instruction-/tmp/7e1c4c23d4464ec4_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
44d8aa1a-ecd0-42f5-80d3-60ff76d8044e.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Llama-3.2-1B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 05366f42f2c8edcd_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/05366f42f2c8edcd_train_data.json
11
+ type:
12
+ field_input: schema
13
+ field_instruction: question
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/5a684b11-35f0-45eb-8ff7-2641074d853f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|end_of_text|>
42
+ tokenizer_type: PreTrainedTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/05366f42f2c8edcd_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
4524a254-d619-44de-addb-80d1a56f0224.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: katuni4ka/tiny-random-falcon-40b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b30c17b6a3700eb0_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b30c17b6a3700eb0_train_data.json
11
+ type:
12
+ field_instruction: hieroglyphs
13
+ field_output: translation
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/f64de308-5d39-4088-bb11-f9b890e56369
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 0
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: <|endoftext|>
41
+ tokenizer_type: PreTrainedTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: katuni4ka/tiny-random-falcon-40b-/tmp/b30c17b6a3700eb0_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
4692c3b1-0351-4533-948d-ace8c76ceb1f.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2.5-0.5B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - e79aa413a56fb417_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/e79aa413a56fb417_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: chosen
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: SystemAdmin123/c9c706d7-4304-4009-a2b2-f6bd643f09f1
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ micro_batch_size: 3
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 10
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Qwen2.5-0.5B-/tmp/e79aa413a56fb417_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true