Spaces:
Sleeping
Sleeping
| { | |
| "base_config": "config/base.json", | |
| "model_type": "AutoencoderKL", | |
| "task_type": "tta", | |
| "dataset": [ | |
| "AudioCaps" | |
| ], | |
| "preprocess": { | |
| // feature used for model training | |
| "use_spkid": false, | |
| "use_uv": false, | |
| "use_frame_pitch": false, | |
| "use_phone_pitch": false, | |
| "use_frame_energy": false, | |
| "use_phone_energy": false, | |
| "use_mel": false, | |
| "use_audio": false, | |
| "use_label": false, | |
| "use_one_hot": false | |
| }, | |
| // model | |
| "model": { | |
| "autoencoderkl": { | |
| "ch": 128, | |
| "ch_mult": [ | |
| 1, | |
| 1, | |
| 2, | |
| 2, | |
| 4 | |
| ], | |
| "num_res_blocks": 2, | |
| "in_channels": 1, | |
| "z_channels": 4, | |
| "out_ch": 1, | |
| "double_z": true | |
| }, | |
| "loss": { | |
| "kl_weight": 1e-8, | |
| "disc_weight": 0.5, | |
| "disc_factor": 1.0, | |
| "logvar_init": 0.0, | |
| "min_adapt_d_weight": 0.0, | |
| "max_adapt_d_weight": 10.0, | |
| "disc_start": 50001, | |
| "disc_in_channels": 1, | |
| "disc_num_layers": 3, | |
| "use_actnorm": false | |
| } | |
| }, | |
| // train | |
| "train": { | |
| "lronPlateau": { | |
| "factor": 0.9, | |
| "patience": 100, | |
| "min_lr": 4.0e-5, | |
| "verbose": true | |
| }, | |
| "adam": { | |
| "lr": 4.0e-4, | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "weight_decay": 1.0e-2, | |
| "eps": 1.0e-8 | |
| } | |
| } | |
| } |