Spaces:

chenxie95
/

MeanAudio

Running on Zero

App Files Files Community

MeanAudio / config /base_config.yaml

junxiliu

add needed model with proper LFS tracking

3a1da90 4 months ago

raw

history blame

1.65 kB

	defaults:
	- data: t5_clap # chenge here to load different data in testing (data.AudioCaps_test)
	- override hydra/job_logging: custom-simplest
	- _self_

	hydra:
	run:
	dir: ./exps/${exp_id}
	output_subdir: ${now:%Y-%m-%d_%H-%M-%S}-hydra

	enable_email: False

	## model
	model: meanaudio_mf
	text_encoder_name: t5_clap # [t5, clip, t5_clap, t5_clap_cat]: change here for different feature utils (only for runner-FeatureUtils/infer, not used for using pre-computed dataset)
	concat_text_fc: False

	exp_id: default
	debug: False
	cudnn_benchmark: True
	compile: False # set compile to false by default
	amp: True
	weights: null
	# weights: null

	checkpoint: null

	seed: 14159265
	num_workers: 10 # per-GPU
	pin_memory: False # set to True if your system can handle it, i.e., have enough memory

	# NOTE: This DOSE NOT affect the model during inference in any way
	# they are just for the dataloader to fill in the missing data in multi-modal loading
	# to change the sequence length for the model, see networks.py
	data_dim:
	text_seq_len: 77
	text_dim: 1024
	text_c_dim: 512 # 1024 for pooled T5, 512 for CLAP

	# ema configuration
	ema:
	enable: True
	sigma_rels: [0.05, 0.1]
	update_every: 1
	checkpoint_every: 10_000
	checkpoint_folder: ${hydra:run.dir}/ema_ckpts
	default_output_sigma: 0.05


	# sampling, only for flow matching
	sampling:
	mean: 0.0
	scale: 1.0
	min_sigma: 0.0
	method: euler
	num_steps: 25

	# classifier-free guidance
	null_condition_probability: 0.1
	cfg_strength: 1

	# checkpoint paths to external modules
	vae_16k_ckpt: ./weights/v1-16.pth
	vae_44k_ckpt: ./weights/v1-44.pth
	bigvgan_vocoder_ckpt: ./weights/best_netG.pt