|
|
|
paths:
|
|
run_dir: results/${project}
|
|
ckpt_dir: ${paths.run_dir}/checkpoints
|
|
|
|
hydra:
|
|
run:
|
|
dir: ${paths.run_dir}
|
|
|
|
|
|
trainer:
|
|
_target_: lightning.pytorch.trainer.Trainer
|
|
|
|
default_root_dir: ${paths.run_dir}
|
|
accelerator: gpu
|
|
num_nodes: 1
|
|
devices: auto
|
|
strategy:
|
|
_target_: lightning.pytorch.strategies.DDPStrategy
|
|
process_group_backend: nccl
|
|
|
|
precision: bf16-mixed
|
|
|
|
|
|
check_val_every_n_epoch: null
|
|
val_check_interval: 5000
|
|
max_steps: 100_000
|
|
|
|
|
|
benchmark: true
|
|
|
|
|
|
callbacks:
|
|
model_checkpoint:
|
|
_target_: lightning.pytorch.callbacks.ModelCheckpoint
|
|
dirpath: ${paths.ckpt_dir}
|
|
filename: "step_{step:09d}"
|
|
save_last: false
|
|
save_top_k: 5
|
|
monitor: step
|
|
mode: max
|
|
every_n_epochs: null
|
|
every_n_train_steps: 5000
|
|
auto_insert_metric_name: false
|
|
|
|
model_summary:
|
|
_target_: lightning.pytorch.callbacks.ModelSummary
|
|
max_depth: 2
|
|
|
|
learning_rate_monitor:
|
|
_target_: lightning.pytorch.callbacks.LearningRateMonitor
|
|
logging_interval: step
|
|
log_momentum: false
|
|
|
|
grad_norm_monitor:
|
|
_target_: fish_speech.callbacks.GradNormMonitor
|
|
norm_type: 2
|
|
logging_interval: step
|
|
|
|
|
|
logger:
|
|
tensorboard:
|
|
_target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
|
|
save_dir: "${paths.run_dir}/tensorboard/"
|
|
name: null
|
|
log_graph: false
|
|
default_hp_metric: true
|
|
prefix: ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train: true
|
|
test: false
|
|
|