dataloader_config: !!python/object:src.datamodule.DataloaderConfig batch_size: 16 drop_last: false eval_batch_size: 128 multiprocessing_context: null num_workers: 12 persistent_workers: false pin_memory: true prefetch_factor: 2 shuffle: true eod_token_id: 0 max_position_embeddings: 2048 optim_config: !!python/object:src.module.OptimCofig keller_kwargs: {} lr: 0.0006 num_warmup_steps: 2000 optim_kwargs: betas: - 0.9 - 0.95 eps: 1.0e-08 fused: true optim_name: adamw scheduler_kwargs: min_lr_ratio: 0.01 num_decay_steps: 2000 num_stable_steps: 46000 scheduler_name: warmup_stable_decay weight_decay: 0.1 train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation