pszemraj's picture
Upload folder using huggingface_hub
d85a90d verified
raw
history blame
879 Bytes
mode: pt
device: gpu
precision: bf16
eval_only: false
predict_only: false
seed: 80085
model:
klass: local_t5
name: pszemraj/tFINE-base-65kBPE-FLAN
overwrite:
dropout_rate: 0.0
add_config:
is_bf16: false
checkpoint_path: ''
random_init: true
compile: true
tokenizer:
name: BEE-spoke-data/slimpajama_tok-48128-BPE-forT5
data:
input_length: 1024
mlm_probability: 0.15
mean_noise_span_length: 3.0
num_workers: 8
optim:
name: adamwscale
base_lr: 0.008
batch_size: 120
total_steps: 80000
epochs: -1
warmup_steps: 10000
lr_scheduler: cosine
weight_decay: 0.0001
grad_clip: 1.0
grad_acc: 24
final_cosine: 1.0e-05
eval:
every_steps: 100000
steps: 500
checkpoint:
every_steps: 5000
logging:
neptune: false
neptune_creds:
project: null
api_token: null
tags: ''
every_steps: 50
grad_l2: true
weights_l2: true