mtasic85 commited on
Commit
047b831
·
1 Parent(s): ab16584

pretrain core

Browse files
scripts/pretrain_core_model_0.yaml CHANGED
@@ -10,7 +10,7 @@ model_config:
10
  vocab_size: 131072
11
  padded_vocab_size: 131072
12
  n_layer: 32
13
- n_head: 32
14
  n_embd: 512
15
  n_query_groups: 8
16
  rotary_percentage: 1.0
 
10
  vocab_size: 131072
11
  padded_vocab_size: 131072
12
  n_layer: 32
13
+ n_head: 16
14
  n_embd: 512
15
  n_query_groups: 8
16
  rotary_percentage: 1.0
scripts/requirements.in CHANGED
@@ -14,7 +14,7 @@ mergekit @ git+https://github.com/arcee-ai/mergekit.git
14
  torchao
15
  # bitsandbytes
16
  # grokadamw
17
- # sophia-opt
18
  dolphinflow @ git+https://github.com/cognitivecomputations/dolphinflow-optimizer.git
19
  # unsloth
20
  lm_eval[ifeval,math]
 
14
  torchao
15
  # bitsandbytes
16
  # grokadamw
17
+ sophia-opt
18
  dolphinflow @ git+https://github.com/cognitivecomputations/dolphinflow-optimizer.git
19
  # unsloth
20
  lm_eval[ifeval,math]