model_name: tangled-alpha-0.10-core model_config: name: tangled-alpha-0.10-core hf_config: {} block_size: 131072 n_layer: 32 n_embd: 768 vocab_size: 131072 padding_multiple: 512 padded_vocab_size: 131072 norm_class_name: RMSNorm norm_eps: 1.0e-05 norm_qk: false post_attention_norm: false post_mlp_norm: false parallel_residual: false shared_attention_norm: false n_head: 12 head_size: 64 n_query_groups: 4 attn_bias: false rope_base: 27000 rotary_percentage: 1.0 rope_condense_ratio: 1 intermediate_size: 2048 bias: false mlp_class_name: LLaMAMLP gelu_approximate: none n_expert: 0 n_expert_per_token: 0 scale_embeddings: false lm_head_bias: false out_dir: ../out/pretrain-core-2 precision: bf16-true initial_checkpoint_dir: ../out/pretrain-core-1/checkpoint data: class_path: litgpt.data.LitData init_args: data_path: ../core-data-2-2049-4097-4097-4000/ seed: 42 num_workers: 32 train: save_interval: 25 log_interval: 1 global_batch_size: 512 micro_batch_size: 2 lr_warmup_steps: 0 max_tokens: 1536801088 max_seq_length: 4097 tie_embeddings: false max_norm: 1.0 min_lr: 1.0e-05 eval: interval: 25 max_iters: 100 initial_validation: true final_validation: true evaluate_example: first optimizer: class_path: sophia_opt.SophiaG init_args: lr: 1.0e-05 betas: - 0.965 - 0.99 rho: 0.04 weight_decay: 0.1 devices: auto num_nodes: 1 tokenizer_dir: ../tokenizer logger_name: wandb seed: 23