pretrain
Browse files
scripts/pretrain_base_model_0.yaml
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
# The name of the model to pretrain. Choose from names in ``litgpt.config``. Mutually exclusive with
|
2 |
# ``model_config``. (type: Optional[str], default: null)
|
3 |
-
model_name: 'tangled-alpha-0.12-
|
4 |
|
5 |
# A ``litgpt.Config`` object to define the model architecture. Mutually exclusive with
|
6 |
# ``model_config``. (type: Optional[Config], default: null)
|
7 |
model_config:
|
8 |
-
name: 'tangled-alpha-0.12-
|
9 |
block_size: 131072
|
10 |
vocab_size: 65536
|
11 |
padded_vocab_size: 65536
|
@@ -25,7 +25,7 @@ model_config:
|
|
25 |
|
26 |
# Directory in which to save checkpoints and logs. If running in a Lightning Studio Job, look for it in
|
27 |
# /teamspace/jobs/<job-name>/share. (type: <class 'Path'>, default: out/pretrain)
|
28 |
-
out_dir: "../out/pretrain-
|
29 |
|
30 |
# The precision to use for pretraining. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
|
31 |
# precision: bf16-mixed
|
@@ -61,7 +61,7 @@ train:
|
|
61 |
global_batch_size: 512
|
62 |
|
63 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
64 |
-
micro_batch_size:
|
65 |
|
66 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
67 |
lr_warmup_steps: 2000
|
|
|
1 |
# The name of the model to pretrain. Choose from names in ``litgpt.config``. Mutually exclusive with
|
2 |
# ``model_config``. (type: Optional[str], default: null)
|
3 |
+
model_name: 'tangled-alpha-0.12-base'
|
4 |
|
5 |
# A ``litgpt.Config`` object to define the model architecture. Mutually exclusive with
|
6 |
# ``model_config``. (type: Optional[Config], default: null)
|
7 |
model_config:
|
8 |
+
name: 'tangled-alpha-0.12-base'
|
9 |
block_size: 131072
|
10 |
vocab_size: 65536
|
11 |
padded_vocab_size: 65536
|
|
|
25 |
|
26 |
# Directory in which to save checkpoints and logs. If running in a Lightning Studio Job, look for it in
|
27 |
# /teamspace/jobs/<job-name>/share. (type: <class 'Path'>, default: out/pretrain)
|
28 |
+
out_dir: "../out/pretrain-base-0/"
|
29 |
|
30 |
# The precision to use for pretraining. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
|
31 |
# precision: bf16-mixed
|
|
|
61 |
global_batch_size: 512
|
62 |
|
63 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
64 |
+
micro_batch_size: 2
|
65 |
|
66 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
67 |
lr_warmup_steps: 2000
|