mtasic85 commited on
Commit
b34d4e8
·
1 Parent(s): 7206717
Files changed (1) hide show
  1. scripts/pretrain_base_model_0.yaml +4 -4
scripts/pretrain_base_model_0.yaml CHANGED
@@ -1,11 +1,11 @@
1
  # The name of the model to pretrain. Choose from names in ``litgpt.config``. Mutually exclusive with
2
  # ``model_config``. (type: Optional[str], default: null)
3
- model_name: 'tangled-alpha-0.12-core'
4
 
5
  # A ``litgpt.Config`` object to define the model architecture. Mutually exclusive with
6
  # ``model_config``. (type: Optional[Config], default: null)
7
  model_config:
8
- name: 'tangled-alpha-0.12-core'
9
  block_size: 131072
10
  vocab_size: 65536
11
  padded_vocab_size: 65536
@@ -25,7 +25,7 @@ model_config:
25
 
26
  # Directory in which to save checkpoints and logs. If running in a Lightning Studio Job, look for it in
27
  # /teamspace/jobs/<job-name>/share. (type: <class 'Path'>, default: out/pretrain)
28
- out_dir: "../out/pretrain-core-0/"
29
 
30
  # The precision to use for pretraining. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
31
  # precision: bf16-mixed
@@ -61,7 +61,7 @@ train:
61
  global_batch_size: 512
62
 
63
  # Number of samples per data-parallel rank (type: int, default: 4)
64
- micro_batch_size: 1
65
 
66
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
67
  lr_warmup_steps: 2000
 
1
  # The name of the model to pretrain. Choose from names in ``litgpt.config``. Mutually exclusive with
2
  # ``model_config``. (type: Optional[str], default: null)
3
+ model_name: 'tangled-alpha-0.12-base'
4
 
5
  # A ``litgpt.Config`` object to define the model architecture. Mutually exclusive with
6
  # ``model_config``. (type: Optional[Config], default: null)
7
  model_config:
8
+ name: 'tangled-alpha-0.12-base'
9
  block_size: 131072
10
  vocab_size: 65536
11
  padded_vocab_size: 65536
 
25
 
26
  # Directory in which to save checkpoints and logs. If running in a Lightning Studio Job, look for it in
27
  # /teamspace/jobs/<job-name>/share. (type: <class 'Path'>, default: out/pretrain)
28
+ out_dir: "../out/pretrain-base-0/"
29
 
30
  # The precision to use for pretraining. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
31
  # precision: bf16-mixed
 
61
  global_batch_size: 512
62
 
63
  # Number of samples per data-parallel rank (type: int, default: 4)
64
+ micro_batch_size: 2
65
 
66
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
67
  lr_warmup_steps: 2000