SystemAdmin123 commited on Jan 27

Commit

e876591

verified ·

1 Parent(s): 36f10f5

Training in progress, step 40

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

00000000-0000-0000-0000-000000000000.yml +48 -0
00000000-0000-0000-0000-000000000001.yml +48 -0
00000000-0000-0000-0000-000000000123.yml +50 -0
0017f0db-d7f8-4f78-9dd4-effb388c566e.yml +50 -0
01b60291-41f3-4631-b7e8-f7c60c2ca163.yml +53 -0
03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml +52 -0
0573e27c-6a6f-4989-9b30-ba29f333396d.yml +50 -0
077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml +51 -0
078ba83d-394d-4561-8dc4-25f7f5d342f4.yml +50 -0
08edabf0-86e9-4b88-b7f8-7383b8455fd9.yml +51 -0
09b9ec60-551f-42fb-a5bc-3bb998c6bcb7.yml +50 -0
102906fb-e0c6-458b-91ed-ea75a39fdc12.yml +50 -0
15b64bb1-e5f8-4728-878b-8b09852c0c75.yml +51 -0
19c8688e-ea72-45f4-ad76-056d1e3fe378.yml +53 -0
1a0ae1a8-90b0-4064-a957-ec08b8a626a2.yml +51 -0
1dc178e8-8f66-48ae-8ebb-825428c168d0.yml +53 -0
1e84d93c-fc85-4f62-a6d4-0e9ec4aefaa5.yml +50 -0
1ead3944-f2ff-490c-8e71-0e1cf0736354.yml +52 -0
21315ae5-16ee-43cd-9612-743524060933.yml +50 -0
2135dd61-bfbc-4980-8694-eb2672292c86.yml +50 -0
2311c30b-5869-4555-8890-0c7c01a7e6d8.yml +50 -0
233e1171-06fa-47f5-a61c-f0a283fd0346.yml +50 -0
2593284e-8d2b-49a4-9d90-a5407a2dda74.yml +51 -0
260bfc16-ba7e-4e28-99db-8a240db68244.yml +50 -0
2675458f-3d2d-4c3e-b7c3-94569cdd95ed.yml +51 -0
284c3982-7bc3-4e42-a78c-849f03798c5f.yml +51 -0
29594d3f-2fa3-437a-80c0-be13835d5ddd.yml +53 -0
29e89a2c-6136-48b6-88bc-a0066652be7d.yml +52 -0
2b93d38c-cfcf-4c91-bd74-d78b57d6e88c.yml +50 -0
2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml +53 -0
2db36e53-9fad-4e67-b969-b3cc09303391.yml +53 -0
2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml +51 -0
2fe0c844-e98c-476d-b9a0-1a41beb91022.yml +50 -0
30e923d1-5b61-4748-ad75-b5645e8a66f9.yml +52 -0
356b1304-361f-49fb-bc60-43e6188679c1.yml +51 -0
36a43860-f8fa-4c32-afb5-c665be741dc4.yml +52 -0
3797da9e-eaaf-4f36-ac37-50d8b1c8015a.yml +51 -0
393d9fe7-0c17-4f36-b481-1a50ecf87c09.yml +50 -0
39a09fca-d7db-49b6-9d31-760f252e1a05.yml +50 -0
3b1bd1a9-4f82-45c0-b25f-14c4857d43f1.yml +51 -0
3c645e30-be80-48c3-9eda-ca9a31c5ac1d.yml +53 -0
3d940075-c928-42d9-8a10-07211af2fe5d.yml +50 -0
40f27435-f59d-488f-b2d6-01e356d79c48.yml +50 -0
419d7d82-9c5d-40a8-9096-ea94ed8503b4.yml +50 -0
424a19bd-9a6f-4171-a205-745ee0bc1a03.yml +50 -0
427d02be-6008-4556-9a5e-9c7cb7503058.yml +51 -0
428f88e9-4d8c-411a-a2d9-f723f8dc8229.yml +50 -0
44d8aa1a-ecd0-42f5-80d3-60ff76d8044e.yml +53 -0
4524a254-d619-44de-addb-80d1a56f0224.yml +52 -0
4692c3b1-0351-4533-948d-ace8c76ceb1f.yml +50 -0

00000000-0000-0000-0000-000000000000.yml ADDED Viewed

	@@ -0,0 +1,48 @@

+base_model: peft-internal-testing/tiny-dummy-qwen2
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- format: custom
+  path: argilla/databricks-dolly-15k-curated-en
+  type:
+    field_input: original-instruction
+    field_instruction: original-instruction
+    field_output: original-response
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/test-repo
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: peft-internal-testing/tiny-dummy-qwen2-argilla/databricks-dolly-15k-curated-en
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

00000000-0000-0000-0000-000000000001.yml ADDED Viewed

	@@ -0,0 +1,48 @@

+base_model: Qwen/Qwen2-7B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- format: custom
+  path: argilla/databricks-dolly-15k-curated-en
+  type:
+    field_input: original-instruction
+    field_instruction: original-instruction
+    field_output: original-response
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/test-repo
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Qwen/Qwen2-7B-Instruct-argilla/databricks-dolly-15k-curated-en
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

00000000-0000-0000-0000-000000000123.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: fxmarty/small-llama-testing
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- format: custom
+  path: argilla/databricks-dolly-15k-curated-en
+  type:
+    field_input: original-instruction
+    field_instruction: original-instruction
+    field_output: original-response
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/test-repo
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: fxmarty/small-llama-testing-argilla/databricks-dolly-15k-curated-en
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

0017f0db-d7f8-4f78-9dd4-effb388c566e.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: Qwen/Qwen2.5-Math-7B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - e379af78ad5f53fa_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/e379af78ad5f53fa_train_data.json
+  type:
+    field_instruction: SeedRule
+    field_output: prompt
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/1b230251-8147-4d9a-b940-904d5e878a9d
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/tmp/e379af78ad5f53fa_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

01b60291-41f3-4631-b7e8-f7c60c2ca163.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: huggyllama/llama-7b
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - ccd32583f980ebf0_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/ccd32583f980ebf0_train_data.json
+  type:
+    field_input: ''
+    field_instruction: problem
+    field_output: solution
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/fa36bf4c-34a6-4e51-ae14-a8372bf92b39
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: huggyllama/llama-7b-/workspace/input_data/ccd32583f980ebf0_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+base_model: fxmarty/tiny-llama-fast-tokenizer
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - fc6136aac03f618a_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/fc6136aac03f618a_train_data.json
+  type:
+    field_instruction: text
+    field_output: title
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

0573e27c-6a6f-4989-9b30-ba29f333396d.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: dltjdgh0928/test_instruction
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 76022a30315552b8_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/76022a30315552b8_train_data.json
+  type:
+    field_instruction: input
+    field_output: target
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/c32e2a6e-d46b-44d8-953f-917301378acd
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: dltjdgh0928/test_instruction-/workspace/input_data/76022a30315552b8_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: unsloth/mistral-7b-v0.3
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - ca0152973425c947_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/ca0152973425c947_train_data.json
+  type:
+    field_input: code
+    field_instruction: func_name
+    field_output: docstring
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/5a2f5ce6-446b-4282-bb4d-9ee4e970231f
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/mistral-7b-v0.3-/tmp/ca0152973425c947_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

078ba83d-394d-4561-8dc4-25f7f5d342f4.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: numind/NuExtract-v1.5
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - b23f426a106701fe_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/b23f426a106701fe_train_data.json
+  type:
+    field_instruction: question
+    field_output: answer
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/53cbcb68-02e6-4a0d-ae57-14c9018f7e1d
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: numind/NuExtract-v1.5-/workspace/input_data/b23f426a106701fe_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

08edabf0-86e9-4b88-b7f8-7383b8455fd9.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: facebook/opt-350m
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 5502b08b9b3b41f5_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/5502b08b9b3b41f5_train_data.json
+  type:
+    field_input: function
+    field_instruction: cwe_description
+    field_output: cve_description
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/ea3910f1-2762-4e77-bf49-50261890a7c3
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: GPT2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: facebook/opt-350m-/tmp/5502b08b9b3b41f5_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

09b9ec60-551f-42fb-a5bc-3bb998c6bcb7.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/Qwen2-0.5B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 86ba74df94452c98_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/86ba74df94452c98_train_data.json
+  type:
+    field_instruction: instruction
+    field_output: output
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/6f0d89e4-45b1-4dc8-877b-bec29de112de
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Qwen2-0.5B-Instruct-/workspace/input_data/86ba74df94452c98_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

102906fb-e0c6-458b-91ed-ea75a39fdc12.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/Phi-3.5-mini-instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 286ecee6d914760d_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/286ecee6d914760d_train_data.json
+  type:
+    field_instruction: ja
+    field_output: en
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/df962b68-cd57-4f76-a085-6da16b65c260
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Phi-3.5-mini-instruct-/tmp/286ecee6d914760d_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

15b64bb1-e5f8-4728-878b-8b09852c0c75.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: Eurdem/Defne_llama3_2x8B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 235dcd465bd8663e_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/235dcd465bd8663e_train_data.json
+  type:
+    field_input: input
+    field_instruction: instruction
+    field_output: output
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/4865f614-7ca8-416c-a631-a000a6af7a20
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Eurdem/Defne_llama3_2x8B-/tmp/235dcd465bd8663e_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

19c8688e-ea72-45f4-ad76-056d1e3fe378.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: codellama/CodeLlama-7b-Instruct-hf
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 0af14c27ef012868_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/0af14c27ef012868_train_data.json
+  type:
+    field_input: text
+    field_instruction: subject
+    field_output: title
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/69c5f55b-dddc-4b99-a936-751161256f95
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: CodeLlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: codellama/CodeLlama-7b-Instruct-hf-/workspace/input_data/0af14c27ef012868_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

1a0ae1a8-90b0-4064-a957-ec08b8a626a2.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: jhflow/mistral7b-lora-multi-turn-v2
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 77a0df1c69074e81_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/77a0df1c69074e81_train_data.json
+  type:
+    field_input: keyword
+    field_instruction: abstract
+    field_output: title
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/0c660dfd-6e21-4c2b-9403-0c4b6ad1bf52
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: jhflow/mistral7b-lora-multi-turn-v2-/tmp/77a0df1c69074e81_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

1dc178e8-8f66-48ae-8ebb-825428c168d0.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: NousResearch/Yarn-Mistral-7b-64k
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - bccab6bcbcb6fc03_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/bccab6bcbcb6fc03_train_data.json
+  type:
+    field_input: choices
+    field_instruction: full_prompt
+    field_output: example
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/93085afc-6d0b-49ca-ac4a-839ea57462a9
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: NousResearch/Yarn-Mistral-7b-64k-/workspace/input_data/bccab6bcbcb6fc03_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

1e84d93c-fc85-4f62-a6d4-0e9ec4aefaa5.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: fxmarty/tiny-dummy-qwen2
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - c0a032ebb939dd62_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/c0a032ebb939dd62_train_data.json
+  type:
+    field_instruction: prompt
+    field_output: chosen
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/da53a378-c2cd-4060-af7d-31ee77ad94d8
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: fxmarty/tiny-dummy-qwen2-/tmp/c0a032ebb939dd62_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

1ead3944-f2ff-490c-8e71-0e1cf0736354.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+base_model: NousResearch/Meta-Llama-3-8B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 079da9e23dfe7fea_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/079da9e23dfe7fea_train_data.json
+  type:
+    field_instruction: filename
+    field_output: title
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/3cb570bf-4ffe-411e-86e0-ad8b9bf19f1b
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|end_of_text|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: NousResearch/Meta-Llama-3-8B-/workspace/input_data/079da9e23dfe7fea_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

21315ae5-16ee-43cd-9612-743524060933.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/Meta-Llama-3.1-8B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 562fa3aeea07046a_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/562fa3aeea07046a_train_data.json
+  type:
+    field_instruction: prompt
+    field_output: text
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/c4596edc-efad-4776-86a1-caa06bffcada
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Meta-Llama-3.1-8B-/workspace/input_data/562fa3aeea07046a_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2135dd61-bfbc-4980-8694-eb2672292c86.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: peft-internal-testing/tiny-dummy-qwen2
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 7041e6a7c6976935_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/7041e6a7c6976935_train_data.json
+  type:
+    field_instruction: Sequence
+    field_output: Secondary_structure
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/aa07d7d0-0e68-4d05-a943-be6d5a82bcf5
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: peft-internal-testing/tiny-dummy-qwen2-/tmp/7041e6a7c6976935_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2311c30b-5869-4555-8890-0c7c01a7e6d8.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: Qwen/Qwen2.5-Math-7B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 63970f9aa6060c87_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/63970f9aa6060c87_train_data.json
+  type:
+    field_instruction: question
+    field_output: answer
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/e48fbbf8-54f7-401b-bc8f-38d51942e140
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/workspace/input_data/63970f9aa6060c87_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

233e1171-06fa-47f5-a61c-f0a283fd0346.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/codegemma-2b
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 5c38c9685f2d92d6_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/5c38c9685f2d92d6_train_data.json
+  type:
+    field_instruction: problem
+    field_output: solution
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/5c2b97fc-05d6-480b-a036-352bd82cc8ed
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 9
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: GemmaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/codegemma-2b-/tmp/5c38c9685f2d92d6_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2593284e-8d2b-49a4-9d90-a5407a2dda74.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: lmsys/vicuna-7b-v1.3
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 035ee0afc5220cd9_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/035ee0afc5220cd9_train_data.json
+  type:
+    field_input: algo_name
+    field_instruction: question
+    field_output: answer
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/e3a35688-e9b7-46d1-8f93-172a66f78e04
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: lmsys/vicuna-7b-v1.3-/tmp/035ee0afc5220cd9_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

260bfc16-ba7e-4e28-99db-8a240db68244.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/codegemma-7b
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - c42c79a730cf3f73_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/c42c79a730cf3f73_train_data.json
+  type:
+    field_instruction: prompt
+    field_output: chosen
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/4ef7ad27-7fca-469a-97eb-ef5effb6d888
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: GemmaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/codegemma-7b-/tmp/c42c79a730cf3f73_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2675458f-3d2d-4c3e-b7c3-94569cdd95ed.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: bigscience/bloomz-560m
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - a6a22929b7211ec8_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/a6a22929b7211ec8_train_data.json
+  type:
+    field_input: privacy_mask
+    field_instruction: masked_text
+    field_output: unmasked_text
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/275c513f-c7ea-4c16-b6eb-7e9453a6a0ac
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: BloomTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: bigscience/bloomz-560m-/tmp/a6a22929b7211ec8_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

284c3982-7bc3-4e42-a78c-849f03798c5f.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: unsloth/Qwen2-7B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 3e306f9221b79797_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/3e306f9221b79797_train_data.json
+  type:
+    field_input: dialogue
+    field_instruction: rendered_input
+    field_output: summary
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/f3c9c6cc-5806-45b4-aab9-d03de6022b3a
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Qwen2-7B-Instruct-/workspace/input_data/3e306f9221b79797_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

29594d3f-2fa3-437a-80c0-be13835d5ddd.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: rayonlabs/0a05f4a1-93a3-48f9-8aaa-6cab9c9e3762
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - a720a9d4bd31efdf_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/a720a9d4bd31efdf_train_data.json
+  type:
+    field_input: context
+    field_instruction: question
+    field_output: final_decision
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/71efd339-65b7-418d-8796-5d4030d63c4a
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|end_of_text|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: rayonlabs/0a05f4a1-93a3-48f9-8aaa-6cab9c9e3762-/tmp/a720a9d4bd31efdf_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

29e89a2c-6136-48b6-88bc-a0066652be7d.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+base_model: JackFram/llama-68m
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - ff3a521d02fa72b2_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/ff3a521d02fa72b2_train_data.json
+  type:
+    field_instruction: context
+    field_output: question
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/4ada8092-cc1e-445c-9260-a580ef2586ae
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: JackFram/llama-68m-/workspace/input_data/ff3a521d02fa72b2_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2b93d38c-cfcf-4c91-bd74-d78b57d6e88c.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: Qwen/Qwen2-0.5B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 4c9e3de49f6146d7_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/4c9e3de49f6146d7_train_data.json
+  type:
+    field_instruction: instruction
+    field_output: solution
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/92a2f682-b071-4b7f-b1ad-9d92ad248d58
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Qwen/Qwen2-0.5B-Instruct-/workspace/input_data/4c9e3de49f6146d7_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2cc30bfb-2df3-4b31-b1fb-e29900be6958.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: NousResearch/Llama-3.2-1B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - f51beb4c568b9128_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/f51beb4c568b9128_train_data.json
+  type:
+    field_input: keywords
+    field_instruction: idea
+    field_output: full_response
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/0c2649cc-2fe7-4e88-b672-6da1fee4001f
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|end_of_text|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/f51beb4c568b9128_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2db36e53-9fad-4e67-b969-b3cc09303391.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: NousResearch/Llama-3.2-1B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - bb5c3bd8ee309eb0_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/bb5c3bd8ee309eb0_train_data.json
+  type:
+    field_input: system_prompt
+    field_instruction: question
+    field_output: response
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/60e7a811-5939-479a-aa70-b97de53f693d
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|end_of_text|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: NousResearch/Llama-3.2-1B-/tmp/bb5c3bd8ee309eb0_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: katuni4ka/tiny-random-qwen1.5-moe
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 95544452e61c7393_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/95544452e61c7393_train_data.json
+  type:
+    field_input: input
+    field_instruction: instruction
+    field_output: output
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/e61e89f0-854a-4922-8d25-dae435e91af0
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: katuni4ka/tiny-random-qwen1.5-moe-/workspace/input_data/95544452e61c7393_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

2fe0c844-e98c-476d-b9a0-1a41beb91022.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/Qwen2.5-Math-1.5B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - d16d347b651ede3e_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/d16d347b651ede3e_train_data.json
+  type:
+    field_instruction: aspect_list
+    field_output: caption_summary
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/d2eafa75-a7f1-408a-a817-38be914edc2d
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Qwen2.5-Math-1.5B-Instruct-/workspace/input_data/d16d347b651ede3e_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

30e923d1-5b61-4748-ad75-b5645e8a66f9.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+base_model: VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 8cea1b501202bc61_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/8cea1b501202bc61_train_data.json
+  type:
+    field_instruction: principle
+    field_output: instruction
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/4797a41f-d00c-44df-82e0-f23102492c0b
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|eot_id|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct-/tmp/8cea1b501202bc61_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

356b1304-361f-49fb-bc60-43e6188679c1.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: scb10x/llama-3-typhoon-v1.5-8b-instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 3d40e8d1c58c211e_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/3d40e8d1c58c211e_train_data.json
+  type:
+    field_input: abstract
+    field_instruction: title
+    field_output: target
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/105663aa-ca91-474c-9030-957c22705518
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: scb10x/llama-3-typhoon-v1.5-8b-instruct-/tmp/3d40e8d1c58c211e_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

36a43860-f8fa-4c32-afb5-c665be741dc4.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+base_model: NousResearch/Yarn-Solar-10b-32k
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 7bb5c8c129066fca_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/7bb5c8c129066fca_train_data.json
+  type:
+    field_instruction: prompt
+    field_output: chosen
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/d02ea0f4-0dee-4d3c-9f12-743ce16384f1
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 1
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: NousResearch/Yarn-Solar-10b-32k-/tmp/7bb5c8c129066fca_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

3797da9e-eaaf-4f36-ac37-50d8b1c8015a.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: furiosa-ai/mlperf-gpt-j-6b
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 80b3f2b5f3ce3209_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/80b3f2b5f3ce3209_train_data.json
+  type:
+    field_input: headline_a
+    field_instruction: rendered_input
+    field_output: headline_b
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/5e183a09-3809-4b42-97f6-0e567d3b687b
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 3
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: GPT2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: furiosa-ai/mlperf-gpt-j-6b-/tmp/80b3f2b5f3ce3209_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

393d9fe7-0c17-4f36-b481-1a50ecf87c09.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/SmolLM-360M
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - a84f26482cb377ef_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/a84f26482cb377ef_train_data.json
+  type:
+    field_instruction: related_work
+    field_output: abstract
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/74db94dc-a2aa-4bd1-842c-ee6a65969df2
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: GPT2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/SmolLM-360M-/workspace/input_data/a84f26482cb377ef_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

39a09fca-d7db-49b6-9d31-760f252e1a05.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - ae60825aae6334e4_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/ae60825aae6334e4_train_data.json
+  type:
+    field_instruction: instruction
+    field_output: output
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/8e5e035b-ad91-4c67-a2c7-1a524b7cc337
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: GemmaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2-/tmp/ae60825aae6334e4_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

3b1bd1a9-4f82-45c0-b25f-14c4857d43f1.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: Qwen/Qwen2.5-1.5B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - f14707e620deedc0_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/f14707e620deedc0_train_data.json
+  type:
+    field_input: problem
+    field_instruction: prompt
+    field_output: solution
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/1d2ad8c0-863e-4389-8dcc-4c05af9b0cfd
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 3
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Qwen/Qwen2.5-1.5B-Instruct-/tmp/f14707e620deedc0_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

3c645e30-be80-48c3-9eda-ca9a31c5ac1d.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: NousResearch/Meta-Llama-3-8B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 932456af08c12528_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/932456af08c12528_train_data.json
+  type:
+    field_input: genres
+    field_instruction: primaryTitle
+    field_output: text
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/66a854ff-b78c-45c7-abb2-367e9ad0a406
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|end_of_text|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: NousResearch/Meta-Llama-3-8B-/tmp/932456af08c12528_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

3d940075-c928-42d9-8a10-07211af2fe5d.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/codegemma-7b-it
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - a4ca6efb0404ddfb_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/a4ca6efb0404ddfb_train_data.json
+  type:
+    field_instruction: references
+    field_output: prompt
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/0c0e5d3f-1349-46d3-9234-4485480fad62
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: GemmaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/codegemma-7b-it-/workspace/input_data/a4ca6efb0404ddfb_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

40f27435-f59d-488f-b2d6-01e356d79c48.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: Qwen/Qwen2-1.5B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - df925134bb2c32b8_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/df925134bb2c32b8_train_data.json
+  type:
+    field_instruction: prompt
+    field_output: amoral
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/ba640bbe-3257-40d8-88fe-26152f412bb7
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Qwen/Qwen2-1.5B-Instruct-/tmp/df925134bb2c32b8_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

419d7d82-9c5d-40a8-9096-ea94ed8503b4.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: Qwen/Qwen2.5-Math-7B-Instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - f58008447a01a2e1_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/f58008447a01a2e1_train_data.json
+  type:
+    field_instruction: question
+    field_output: answer
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/0b0d8bdd-1df1-4e38-93cb-eb2f50e3e64a
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 2
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: Qwen/Qwen2.5-Math-7B-Instruct-/tmp/f58008447a01a2e1_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

424a19bd-9a6f-4171-a205-745ee0bc1a03.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/Mistral-Nemo-Instruct-2407
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 2ad9a6f08f511a85_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/2ad9a6f08f511a85_train_data.json
+  type:
+    field_instruction: question
+    field_output: reponses
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/2ba978bd-1973-4dbe-93d9-13ea0d52bfde
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Mistral-Nemo-Instruct-2407-/tmp/2ad9a6f08f511a85_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

427d02be-6008-4556-9a5e-9c7cb7503058.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+base_model: unsloth/Phi-3.5-mini-instruct
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 7e5b54272524b996_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/7e5b54272524b996_train_data.json
+  type:
+    field_input: input
+    field_instruction: instruction
+    field_output: output
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/ae26a9e9-089e-4d4a-b592-d8935df7c18d
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Phi-3.5-mini-instruct-/workspace/input_data/7e5b54272524b996_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

428f88e9-4d8c-411a-a2d9-f723f8dc8229.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: dltjdgh0928/test_instruction
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 7e1c4c23d4464ec4_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/7e1c4c23d4464ec4_train_data.json
+  type:
+    field_instruction: line
+    field_output: values
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/f921eed7-e3a9-4535-800a-6dca03a54aaa
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 19
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: LlamaTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: dltjdgh0928/test_instruction-/tmp/7e1c4c23d4464ec4_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

44d8aa1a-ecd0-42f5-80d3-60ff76d8044e.yml ADDED Viewed

	@@ -0,0 +1,53 @@

+base_model: NousResearch/Llama-3.2-1B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - 05366f42f2c8edcd_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/05366f42f2c8edcd_train_data.json
+  type:
+    field_input: schema
+    field_instruction: question
+    field_output: output
+    format: '{instruction} {input}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: willtensora/5a684b11-35f0-45eb-8ff7-2641074d853f
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+max_steps: 2500
+micro_batch_size: 4
+model_type: AutoModelForCausalLM
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|end_of_text|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: NousResearch/Llama-3.2-1B-/workspace/input_data/05366f42f2c8edcd_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

4524a254-d619-44de-addb-80d1a56f0224.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+base_model: katuni4ka/tiny-random-falcon-40b
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - b30c17b6a3700eb0_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/b30c17b6a3700eb0_train_data.json
+  type:
+    field_instruction: hieroglyphs
+    field_output: translation
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/f64de308-5d39-4088-bb11-f9b890e56369
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 0
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+special_tokens:
+  pad_token: <|endoftext|>
+tokenizer_type: PreTrainedTokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: katuni4ka/tiny-random-falcon-40b-/tmp/b30c17b6a3700eb0_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true

4692c3b1-0351-4533-948d-ace8c76ceb1f.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+base_model: unsloth/Qwen2.5-0.5B
+batch_size: 32
+bf16: true
+chat_template: tokenizer_default_fallback_alpaca
+datasets:
+- data_files:
+  - e79aa413a56fb417_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/e79aa413a56fb417_train_data.json
+  type:
+    field_instruction: prompt
+    field_output: chosen
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+eval_steps: 20
+flash_attention: true
+gpu_memory_limit: 80GiB
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: SystemAdmin123/c9c706d7-4304-4009-a2b2-f6bd643f09f1
+hub_strategy: checkpoint
+learning_rate: 0.0002
+logging_steps: 10
+lr_scheduler: cosine
+micro_batch_size: 3
+model_type: AutoModelForCausalLM
+num_epochs: 10
+optimizer: adamw_bnb_8bit
+output_dir: /workspace/axolotl/configs
+pad_to_sequence_len: true
+resize_token_embeddings_to_32x: false
+sample_packing: false
+save_steps: 40
+save_total_limit: 1
+sequence_len: 2048
+tokenizer_type: Qwen2TokenizerFast
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.1
+wandb_entity: ''
+wandb_mode: online
+wandb_name: unsloth/Qwen2.5-0.5B-/tmp/e79aa413a56fb417_train_data.json
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: default
+warmup_ratio: 0.05
+xformers_attention: true