SystemAdmin123 commited on
Commit
ae267ef
·
verified ·
1 Parent(s): 7259c5b

Training in progress, step 200

Browse files
Files changed (3) hide show
  1. axolotl_config.yaml +7 -7
  2. model.safetensors +1 -1
  3. training_args.bin +1 -1
axolotl_config.yaml CHANGED
@@ -1,5 +1,5 @@
1
  base_model: unsloth/SmolLM-360M
2
- batch_size: 32
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
5
  datasets:
@@ -17,23 +17,23 @@ device_map: auto
17
  eval_sample_packing: false
18
  eval_steps: 200
19
  flash_attention: true
20
- gpu_memory_limit: 80GiB
21
  group_by_length: true
22
  hub_model_id: SystemAdmin123/SmolLM-360M
23
  hub_strategy: checkpoint
24
  learning_rate: 0.0002
25
  logging_steps: 10
26
  lr_scheduler: cosine
27
- max_steps: 2500
28
- micro_batch_size: 4
29
  model_type: AutoModelForCausalLM
30
  num_epochs: 100
31
  optimizer: adamw_bnb_8bit
32
- output_dir: /root/.sn56/axolotl/outputs/SmolLM-360M
33
  pad_to_sequence_len: true
34
  resize_token_embeddings_to_32x: false
35
- sample_packing: false
36
- save_steps: 400
37
  save_total_limit: 1
38
  sequence_len: 2048
39
  tokenizer_type: GPT2TokenizerFast
 
1
  base_model: unsloth/SmolLM-360M
2
+ batch_size: 92
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
5
  datasets:
 
17
  eval_sample_packing: false
18
  eval_steps: 200
19
  flash_attention: true
20
+ gradient_checkpointing: true
21
  group_by_length: true
22
  hub_model_id: SystemAdmin123/SmolLM-360M
23
  hub_strategy: checkpoint
24
  learning_rate: 0.0002
25
  logging_steps: 10
26
  lr_scheduler: cosine
27
+ max_steps: 10000
28
+ micro_batch_size: 23
29
  model_type: AutoModelForCausalLM
30
  num_epochs: 100
31
  optimizer: adamw_bnb_8bit
32
+ output_dir: /root/.sn56/axolotl/tmp/SmolLM-360M
33
  pad_to_sequence_len: true
34
  resize_token_embeddings_to_32x: false
35
+ sample_packing: true
36
+ save_steps: 200
37
  save_total_limit: 1
38
  sequence_len: 2048
39
  tokenizer_type: GPT2TokenizerFast
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfc7459b8070f26a9a864b85343d1738e3d2dcd6020a04f5fb8b9aca1951ffd2
3
  size 723674912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca635966f2128b90695cdcf1b450ff9388c9812f95f690192973e5b7eefd3c9
3
  size 723674912
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a1d092fd3f29167bdacdba8b2489c3b0abb5e1e28bd152259e356428fc4a59e
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3156bde561d7a483929e0f1d8c097a973dfeb26f4690b823508131f70e6df615
3
  size 6840