mtasic85 commited on
Commit
8a2ebd6
·
1 Parent(s): 7d2cb3f

pretrain core

Browse files
config-0.json CHANGED
@@ -8,7 +8,7 @@
8
  "eos_token_id": 1,
9
  "head_dim": 256,
10
  "hidden_act": "silu",
11
- "hidden_size": 512,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 2048,
14
  "max_position_embeddings": 131072,
 
8
  "eos_token_id": 1,
9
  "head_dim": 256,
10
  "hidden_act": "silu",
11
+ "hidden_size": 768,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 2048,
14
  "max_position_embeddings": 131072,
scripts/pretrain_core_model_0.yaml CHANGED
@@ -11,14 +11,14 @@ model_config:
11
  padded_vocab_size: 131072
12
  n_layer: 32
13
  n_head: 16
14
- n_embd: 512
15
  n_query_groups: 4
16
  rotary_percentage: 1.0
17
  parallel_residual: False
18
  bias: False
19
  norm_class_name: "RMSNorm"
20
  mlp_class_name: "LLaMAMLP"
21
- intermediate_size: 2048 # n_embd * 4
22
  norm_eps: 1e-5
23
  rope_base: 4300 # https://arxiv.org/pdf/2405.14591
24
  head_size: 256 # n_embd / n_head
 
11
  padded_vocab_size: 131072
12
  n_layer: 32
13
  n_head: 16
14
+ n_embd: 768
15
  n_query_groups: 4
16
  rotary_percentage: 1.0
17
  parallel_residual: False
18
  bias: False
19
  norm_class_name: "RMSNorm"
20
  mlp_class_name: "LLaMAMLP"
21
+ intermediate_size: 2048 # n_embd * 2.666
22
  norm_eps: 1e-5
23
  rope_base: 4300 # https://arxiv.org/pdf/2405.14591
24
  head_size: 256 # n_embd / n_head