willtensora commited on
Commit
8845cef
·
verified ·
1 Parent(s): 04e8156

Training in progress, step 40

Browse files
Files changed (43) hide show
  1. .gitattributes +1 -0
  2. 00000000-0000-0000-0000-000000000000.yml +50 -0
  3. 01b60291-41f3-4631-b7e8-f7c60c2ca163.yml +53 -0
  4. 03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml +52 -0
  5. 0573e27c-6a6f-4989-9b30-ba29f333396d.yml +50 -0
  6. 077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml +51 -0
  7. 1dc178e8-8f66-48ae-8ebb-825428c168d0.yml +53 -0
  8. 21315ae5-16ee-43cd-9612-743524060933.yml +50 -0
  9. 284c3982-7bc3-4e42-a78c-849f03798c5f.yml +51 -0
  10. 29e89a2c-6136-48b6-88bc-a0066652be7d.yml +52 -0
  11. 2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml +53 -0
  12. 2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml +51 -0
  13. 40f27435-f59d-488f-b2d6-01e356d79c48.yml +50 -0
  14. 427d02be-6008-4556-9a5e-9c7cb7503058.yml +51 -0
  15. 54c39bbc-809b-4c67-a254-0e03a4884b4e.yml +51 -0
  16. 5b6d979e-5f1d-47f4-a5d3-c1026b8550e5.yml +51 -0
  17. 5ff7bf5f-96dc-43dd-aeeb-560c0ab78db8.yml +51 -0
  18. 63345f8a-4ec9-47f0-9956-6eaa52b2c2a6.yml +52 -0
  19. 6c7ae056-3b4d-460b-ba7b-a4000f32b3f1.yml +51 -0
  20. 75b21ca4-feab-4bdd-92b0-ea6d90dfa18f.yml +51 -0
  21. 7a4a0d08-b201-4939-999e-8cad606c5cdd.yml +50 -0
  22. 879db250-c3f5-4d43-a7c5-c5a456ae5803.yml +50 -0
  23. 8910478d-79cf-499e-8fed-7a2142f7ee60.yml +51 -0
  24. README.md +139 -0
  25. added_tokens.json +5 -0
  26. ba646963-47d5-4d28-bb73-74fd1aef7feb.yml +50 -0
  27. c6d606c5-1bf1-4d46-8f27-e3893d012d1d.yml +50 -0
  28. config.json +28 -0
  29. da9e44b3-e4fb-4905-9c7c-6b03aad6b593.yml +51 -0
  30. e51d7c64-39d2-4079-b777-8892db299c2a.yml +51 -0
  31. ee62f35d-1a99-4f1c-a69c-c91bc444b71f.yml +53 -0
  32. ef61f40b-eca8-4670-964b-fdd3d1d0f066.yml +51 -0
  33. fb974fc0-f90c-40fd-bd22-46669f42b395.yml +51 -0
  34. generation_config.json +7 -0
  35. merges.txt +0 -0
  36. model.safetensors +3 -0
  37. pytorch_model.bin +3 -0
  38. special_tokens_map.json +20 -0
  39. tokenizer.json +3 -0
  40. tokenizer.model +3 -0
  41. tokenizer_config.json +44 -0
  42. training_args.bin +3 -0
  43. vocab.json +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
00000000-0000-0000-0000-000000000000.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: peft-internal-testing/tiny-dummy-qwen2
2
+ batch_size: 8
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - format: custom
7
+ path: argilla/databricks-dolly-15k-curated-en
8
+ type:
9
+ field_input: original-instruction
10
+ field_instruction: original-instruction
11
+ field_output: original-response
12
+ format: '{instruction} {input}'
13
+ no_input_format: '{instruction}'
14
+ system_format: '{system}'
15
+ system_prompt: ''
16
+ eval_steps: 20
17
+ flash_attention: true
18
+ gpu_memory_limit: 80GiB
19
+ gradient_checkpointing: true
20
+ group_by_length: true
21
+ hub_model_id: willtensora/test-repo
22
+ hub_strategy: checkpoint
23
+ learning_rate: 0.002
24
+ load_best_model_at_end: true
25
+ logging_steps: 10
26
+ lr_scheduler: cosine
27
+ max_steps: 1
28
+ micro_batch_size: 1
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 100
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 8
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.001
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: peft-internal-testing/tiny-dummy-qwen2-argilla/databricks-dolly-15k-curated-en
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
01b60291-41f3-4631-b7e8-f7c60c2ca163.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-7b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ccd32583f980ebf0_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ccd32583f980ebf0_train_data.json
11
+ type:
12
+ field_input: ''
13
+ field_instruction: problem
14
+ field_output: solution
15
+ format: '{instruction}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/fa36bf4c-34a6-4e51-ae14-a8372bf92b39
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: </s>
42
+ tokenizer_type: LlamaTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: huggyllama/llama-7b-/workspace/input_data/ccd32583f980ebf0_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: fxmarty/tiny-llama-fast-tokenizer
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - fc6136aac03f618a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/fc6136aac03f618a_train_data.json
11
+ type:
12
+ field_instruction: text
13
+ field_output: title
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
0573e27c-6a6f-4989-9b30-ba29f333396d.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: dltjdgh0928/test_instruction
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 76022a30315552b8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/76022a30315552b8_train_data.json
11
+ type:
12
+ field_instruction: input
13
+ field_output: target
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/c32e2a6e-d46b-44d8-953f-917301378acd
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: dltjdgh0928/test_instruction-/workspace/input_data/76022a30315552b8_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/mistral-7b-v0.3
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ca0152973425c947_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ca0152973425c947_train_data.json
11
+ type:
12
+ field_input: code
13
+ field_instruction: func_name
14
+ field_output: docstring
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/5a2f5ce6-446b-4282-bb4d-9ee4e970231f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/mistral-7b-v0.3-/tmp/ca0152973425c947_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
1dc178e8-8f66-48ae-8ebb-825428c168d0.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Yarn-Mistral-7b-64k
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - bccab6bcbcb6fc03_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/bccab6bcbcb6fc03_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: full_prompt
14
+ field_output: example
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/93085afc-6d0b-49ca-ac4a-839ea57462a9
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: </s>
42
+ tokenizer_type: LlamaTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: NousResearch/Yarn-Mistral-7b-64k-/workspace/input_data/bccab6bcbcb6fc03_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
21315ae5-16ee-43cd-9612-743524060933.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Meta-Llama-3.1-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 562fa3aeea07046a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/562fa3aeea07046a_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: text
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/c4596edc-efad-4776-86a1-caa06bffcada
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: PreTrainedTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Meta-Llama-3.1-8B-/workspace/input_data/562fa3aeea07046a_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
284c3982-7bc3-4e42-a78c-849f03798c5f.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2-7B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 3e306f9221b79797_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/3e306f9221b79797_train_data.json
11
+ type:
12
+ field_input: dialogue
13
+ field_instruction: rendered_input
14
+ field_output: summary
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/f3c9c6cc-5806-45b4-aab9-d03de6022b3a
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Qwen2-7B-Instruct-/workspace/input_data/3e306f9221b79797_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
29e89a2c-6136-48b6-88bc-a0066652be7d.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: JackFram/llama-68m
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ff3a521d02fa72b2_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ff3a521d02fa72b2_train_data.json
11
+ type:
12
+ field_instruction: context
13
+ field_output: question
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/4ada8092-cc1e-445c-9260-a580ef2586ae
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: JackFram/llama-68m-/workspace/input_data/ff3a521d02fa72b2_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Llama-3.2-1B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f51beb4c568b9128_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f51beb4c568b9128_train_data.json
11
+ type:
12
+ field_input: keywords
13
+ field_instruction: idea
14
+ field_output: full_response
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/0c2649cc-2fe7-4e88-b672-6da1fee4001f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|end_of_text|>
42
+ tokenizer_type: PreTrainedTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/f51beb4c568b9128_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: katuni4ka/tiny-random-qwen1.5-moe
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 95544452e61c7393_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/95544452e61c7393_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/e61e89f0-854a-4922-8d25-dae435e91af0
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: katuni4ka/tiny-random-qwen1.5-moe-/workspace/input_data/95544452e61c7393_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
40f27435-f59d-488f-b2d6-01e356d79c48.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - df925134bb2c32b8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/df925134bb2c32b8_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: amoral
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/ba640bbe-3257-40d8-88fe-26152f412bb7
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2-1.5B-Instruct-/tmp/df925134bb2c32b8_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
427d02be-6008-4556-9a5e-9c7cb7503058.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3.5-mini-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 7e5b54272524b996_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/7e5b54272524b996_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/ae26a9e9-089e-4d4a-b592-d8935df7c18d
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Phi-3.5-mini-instruct-/workspace/input_data/7e5b54272524b996_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
54c39bbc-809b-4c67-a254-0e03a4884b4e.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/codegemma-7b-it
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 2ebe89763cb3150d_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/2ebe89763cb3150d_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/a0bc38f8-fcd3-4d7e-9a3f-3aa2e8a4204f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/codegemma-7b-it-/tmp/2ebe89763cb3150d_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
5b6d979e-5f1d-47f4-a5d3-c1026b8550e5.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: princeton-nlp/gemma-2-9b-it-SimPO
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - dbe5c72dde5e5bcb_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/dbe5c72dde5e5bcb_train_data.json
11
+ type:
12
+ field_input: essay
13
+ field_instruction: prompt
14
+ field_output: evaluation
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/1c0e6767-6440-4997-87d3-2298e513b046
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 2
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: princeton-nlp/gemma-2-9b-it-SimPO-/workspace/input_data/dbe5c72dde5e5bcb_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
5ff7bf5f-96dc-43dd-aeeb-560c0ab78db8.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Hermes-3-Llama-3.1-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 69447058613b41d8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/69447058613b41d8_train_data.json
11
+ type:
12
+ field_input: sectionParentTitre
13
+ field_instruction: title_main
14
+ field_output: texte
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/942aa5fc-b540-46ce-b482-e38c4f637264
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: PreTrainedTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: NousResearch/Hermes-3-Llama-3.1-8B-/workspace/input_data/69447058613b41d8_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
63345f8a-4ec9-47f0-9956-6eaa52b2c2a6.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: heegyu/WizardVicuna-open-llama-3b-v2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - fe9267419ea75ad2_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/fe9267419ea75ad2_train_data.json
11
+ type:
12
+ field_instruction: ca_topic
13
+ field_output: article
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/7114c34f-852f-43da-b985-b7f0b6d6d724
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: heegyu/WizardVicuna-open-llama-3b-v2-/tmp/fe9267419ea75ad2_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
6c7ae056-3b4d-460b-ba7b-a4000f32b3f1.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/gemma-2-2b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b98d5b59c20c6595_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b98d5b59c20c6595_train_data.json
11
+ type:
12
+ field_input: metadata
13
+ field_instruction: text
14
+ field_output: tags_str
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/429ee307-6dd2-4dd7-9e1d-7384d807a3df
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/gemma-2-2b-/tmp/b98d5b59c20c6595_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
75b21ca4-feab-4bdd-92b0-ea6d90dfa18f.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2.5-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - c6adcdcb593a3ee4_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/c6adcdcb593a3ee4_train_data.json
11
+ type:
12
+ field_input: abstract
13
+ field_instruction: question_en_origin
14
+ field_output: answer_en_origin
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/2faf844e-4a0a-4d23-95f4-a055e4864133
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: Qwen/Qwen2.5-1.5B-Instruct-/workspace/input_data/c6adcdcb593a3ee4_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
7a4a0d08-b201-4939-999e-8cad606c5cdd.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/GPT4-x-Vicuna-13b-fp16
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - dcf32f9d35bdd1f9_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/dcf32f9d35bdd1f9_train_data.json
11
+ type:
12
+ field_instruction: doc_text
13
+ field_output: summary_text
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/009ef170-2771-4ab8-8e1b-9a9d2a2e1e2b
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 2
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: NousResearch/GPT4-x-Vicuna-13b-fp16-/workspace/input_data/dcf32f9d35bdd1f9_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
879db250-c3f5-4d43-a7c5-c5a456ae5803.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2.5-Coder-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 4d85b564dafa38db_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/4d85b564dafa38db_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: response
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/876ff803-5357-4240-8766-c54166515403
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Qwen2.5-Coder-1.5B-Instruct-/workspace/input_data/4d85b564dafa38db_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
8910478d-79cf-499e-8fed-7a2142f7ee60.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3-medium-4k-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f6199f34ade98809_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f6199f34ade98809_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: question
14
+ field_output: answer
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/2d37ba50-cd70-4895-be62-3477f5193e86
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Phi-3-medium-4k-instruct-/tmp/f6199f34ade98809_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
README.md ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: llama3.2
4
+ base_model: NousResearch/Llama-3.2-1B
5
+ tags:
6
+ - axolotl
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: 0c2649cc-2fe7-4e88-b672-6da1fee4001f
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
17
+ <details><summary>See axolotl config</summary>
18
+
19
+ axolotl version: `0.4.1`
20
+ ```yaml
21
+ base_model: NousResearch/Llama-3.2-1B
22
+ batch_size: 32
23
+ bf16: true
24
+ chat_template: tokenizer_default_fallback_alpaca
25
+ datasets:
26
+ - data_files:
27
+ - f51beb4c568b9128_train_data.json
28
+ ds_type: json
29
+ format: custom
30
+ path: /workspace/input_data/f51beb4c568b9128_train_data.json
31
+ type:
32
+ field_input: keywords
33
+ field_instruction: idea
34
+ field_output: full_response
35
+ format: '{instruction} {input}'
36
+ no_input_format: '{instruction}'
37
+ system_format: '{system}'
38
+ system_prompt: ''
39
+ eval_steps: 20
40
+ flash_attention: true
41
+ gpu_memory_limit: 80GiB
42
+ gradient_checkpointing: true
43
+ group_by_length: true
44
+ hub_model_id: willtensora/0c2649cc-2fe7-4e88-b672-6da1fee4001f
45
+ hub_strategy: checkpoint
46
+ learning_rate: 0.0002
47
+ logging_steps: 10
48
+ lr_scheduler: cosine
49
+ max_steps: 2500
50
+ micro_batch_size: 4
51
+ model_type: AutoModelForCausalLM
52
+ optimizer: adamw_bnb_8bit
53
+ output_dir: /workspace/axolotl/configs
54
+ pad_to_sequence_len: true
55
+ resize_token_embeddings_to_32x: false
56
+ sample_packing: false
57
+ save_steps: 40
58
+ save_total_limit: 1
59
+ sequence_len: 2048
60
+ special_tokens:
61
+ pad_token: <|end_of_text|>
62
+ tokenizer_type: PreTrainedTokenizerFast
63
+ train_on_inputs: false
64
+ trust_remote_code: true
65
+ val_set_size: 0.1
66
+ wandb_entity: ''
67
+ wandb_mode: online
68
+ wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/f51beb4c568b9128_train_data.json
69
+ wandb_project: Gradients-On-Demand
70
+ wandb_run: your_name
71
+ wandb_runid: default
72
+ warmup_ratio: 0.05
73
+ xformers_attention: true
74
+
75
+ ```
76
+
77
+ </details><br>
78
+
79
+ # 0c2649cc-2fe7-4e88-b672-6da1fee4001f
80
+
81
+ This model is a fine-tuned version of [NousResearch/Llama-3.2-1B](https://huggingface.co/NousResearch/Llama-3.2-1B) on the None dataset.
82
+ It achieves the following results on the evaluation set:
83
+ - Loss: 0.0849
84
+
85
+ ## Model description
86
+
87
+ More information needed
88
+
89
+ ## Intended uses & limitations
90
+
91
+ More information needed
92
+
93
+ ## Training and evaluation data
94
+
95
+ More information needed
96
+
97
+ ## Training procedure
98
+
99
+ ### Training hyperparameters
100
+
101
+ The following hyperparameters were used during training:
102
+ - learning_rate: 0.0002
103
+ - train_batch_size: 4
104
+ - eval_batch_size: 4
105
+ - seed: 42
106
+ - distributed_type: multi-GPU
107
+ - num_devices: 8
108
+ - total_train_batch_size: 32
109
+ - total_eval_batch_size: 32
110
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
111
+ - lr_scheduler_type: cosine
112
+ - lr_scheduler_warmup_steps: 12
113
+ - training_steps: 258
114
+
115
+ ### Training results
116
+
117
+ | Training Loss | Epoch | Step | Validation Loss |
118
+ |:-------------:|:------:|:----:|:---------------:|
119
+ | No log | 0.0005 | 1 | 0.2074 |
120
+ | 0.5472 | 0.0097 | 20 | 0.1746 |
121
+ | 0.3199 | 0.0194 | 40 | 0.2036 |
122
+ | 0.2013 | 0.0291 | 60 | 0.1772 |
123
+ | 0.0903 | 0.0388 | 80 | 0.1702 |
124
+ | 0.0875 | 0.0485 | 100 | 0.2040 |
125
+ | 0.1425 | 0.0582 | 120 | 0.1392 |
126
+ | 0.1982 | 0.0679 | 140 | 0.1194 |
127
+ | 0.1372 | 0.0776 | 160 | 0.1014 |
128
+ | 0.0278 | 0.0873 | 180 | 0.0952 |
129
+ | 0.0248 | 0.0970 | 200 | 0.0893 |
130
+ | 0.1051 | 0.1067 | 220 | 0.0875 |
131
+ | 0.0649 | 0.1164 | 240 | 0.0849 |
132
+
133
+
134
+ ### Framework versions
135
+
136
+ - Transformers 4.46.0
137
+ - Pytorch 2.5.0+cu124
138
+ - Datasets 3.0.1
139
+ - Tokenizers 0.20.1
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
ba646963-47d5-4d28-bb73-74fd1aef7feb.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2-0.5B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 1c60ef1fa1ddd4a9_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/1c60ef1fa1ddd4a9_train_data.json
11
+ type:
12
+ field_instruction: en
13
+ field_output: es
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/054d8bb5-59eb-4c69-9472-ab1b71a92df6
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Qwen2-0.5B-/workspace/input_data/1c60ef1fa1ddd4a9_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
c6d606c5-1bf1-4d46-8f27-e3893d012d1d.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: trl-internal-testing/tiny-random-LlamaForCausalLM
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f4a61305a746447c_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f4a61305a746447c_train_data.json
11
+ type:
12
+ field_instruction: sentence1
13
+ field_output: sentence2
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/dab16ec4-4ddf-4ee5-8888-3dc2a83f0f86
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: trl-internal-testing/tiny-random-LlamaForCausalLM-/workspace/input_data/f4a61305a746447c_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2-0.5B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151643,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 896,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4864,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 24,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 14,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 2,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": true,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.46.0",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 151936
28
+ }
da9e44b3-e4fb-4905-9c7c-6b03aad6b593.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/SmolLM2-360M-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f1ccd02a885008e6_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f1ccd02a885008e6_train_data.json
11
+ type:
12
+ field_input: target
13
+ field_instruction: user
14
+ field_output: assistant
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/3da0a03a-adbb-42e3-8fd7-bd7c0b1d3e9f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/SmolLM2-360M-Instruct-/tmp/f1ccd02a885008e6_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
e51d7c64-39d2-4079-b777-8892db299c2a.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: fxmarty/tiny-random-GemmaForCausalLM
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b7c2a4a781c93416_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b7c2a4a781c93416_train_data.json
11
+ type:
12
+ field_input: context
13
+ field_instruction: question
14
+ field_output: answer
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/fd1980a0-7e71-4e52-addb-318dca5991d5
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: fxmarty/tiny-random-GemmaForCausalLM-/workspace/input_data/b7c2a4a781c93416_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
ee62f35d-1a99-4f1c-a69c-c91bc444b71f.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: EleutherAI/pythia-1b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b2a4966d9a5c880e_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b2a4966d9a5c880e_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/ee937811-31d0-4e11-944a-f4f8e06309d2
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|endoftext|>
42
+ tokenizer_type: GPTNeoXTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: EleutherAI/pythia-1b-/workspace/input_data/b2a4966d9a5c880e_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
ef61f40b-eca8-4670-964b-fdd3d1d0f066.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/SmolLM-135M
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 658988857b0a29c9_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/658988857b0a29c9_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: subject
14
+ field_output: question
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/09370687-f28e-45e5-91f6-f87011850a94
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/SmolLM-135M-/workspace/input_data/658988857b0a29c9_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
fb974fc0-f90c-40fd-bd22-46669f42b395.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-0.5B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 745d2d05aaed18f4_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/745d2d05aaed18f4_train_data.json
11
+ type:
12
+ field_input: pos
13
+ field_instruction: task
14
+ field_output: query
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/459779f2-cbce-4ec0-b11c-1dcdf92498d8
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: Qwen/Qwen2-0.5B-/workspace/input_data/745d2d05aaed18f4_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "transformers_version": "4.46.0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a83931a9a94f9632379d50ef2a7c25f6c054134ce8e68739fdab1e8235bfc9b5
3
+ size 988097824
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a3488e39325dea60c21ab0cf3a2715d26192702fde06183582341380d5a328b
3
+ size 2471678226
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
+ size 11418266
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|endoftext|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "split_special_tokens": false,
41
+ "tokenizer_class": "Qwen2Tokenizer",
42
+ "unk_token": null,
43
+ "use_fast": true
44
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf26f85220d2e1869df4e13339d28deef8b39d73592068f8d91b2a230f2ba47
3
+ size 6648
vocab.json ADDED
The diff for this file is too large to render. See raw diff