File size: 2,343 Bytes
29792f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# @package __global__
defaults:
- /solver/default
- /conditioner: none
- _self_
- /solver/musicgen/evaluation: none
- override /dset: audio/default
autocast: true
autocast_dtype: float16
solver: musicgen
sample_rate: ???
channels: ???
compression_model_checkpoint: ???
tokens:
padding_with_special_token: false
cache:
path:
write: false
write_shard: 0
write_num_shards: 1
dataset:
batch_size: 128
num_workers: 10
segment_duration: 30
min_segment_ratio: 0.8 # lower values such as 0.5 result in generations with a lot of silence.
return_info: true
train:
num_samples: 1000000 # need a randomly large number here for AudioDataset
valid:
num_samples: 10000
generate:
num_samples: 50
metrics:
fad:
use_gt: false
model: tf
tf:
bin: null # path to local frechet_audio_distance code
model_path: //reference/fad/vggish_model.ckpt
kld:
use_gt: false
model: passt
passt:
pretrained_length: 20
text_consistency:
use_gt: false
model: clap
clap:
model_path: //reference/clap/music_audioset_epoch_15_esc_90.14.pt
model_arch: 'HTSAT-base'
enable_fusion: false
chroma_cosine:
use_gt: false
model: chroma_base
chroma_base:
sample_rate: ${sample_rate}
n_chroma: 12
radix2_exp: 14
argmax: true
generate:
every: 25
num_workers: 5
path: samples
audio:
format: wav
strategy: loudness
sample_rate: ${sample_rate}
loudness_headroom_db: 14
lm:
prompted_samples: true
unprompted_samples: true
gen_gt_samples: false
prompt_duration: null # if not set, will use dataset.generate.segment_duration / 4
gen_duration: null # if not set, will use dataset.generate.segment_duration
remove_prompts: false
# generation params
use_sampling: false
temp: 1.0
top_k: 0
top_p: 0.0
evaluate:
every: 25
num_workers: 5
metrics:
base: false
fad: false
kld: false
text_consistency: false
chroma_cosine: false
checkpoint:
save_last: true
save_every: 50
keep_last: 10
keep_every_states: null
optim:
epochs: 200
updates_per_epoch: 2000
lr: 1e-4
optimizer: adamw
max_norm: 1.0
eager_sync: true
adam:
betas: [0.9, 0.95]
weight_decay: 0.1
eps: 1e-8
schedule:
lr_scheduler: null
|