jukebox-5b-lyrics / config.json
ArthurZ's picture
ArthurZ HF staff
replace prime with lyric_enc
1721ce5
raw
history blame
3.25 kB
{
"_name_or_path": "jukebox-5b-lyrics",
"architectures": [
"JukeboxModel"
],
"cond_c_res": [
0,
1,
1
],
"cond_depth": [
3,
16,
16
],
"cond_dilation_cycle": [
null,
8,
8
],
"cond_dilation_growth_rate": [
1,
3,
3
],
"cond_downs_t": [
3,
2,
2
],
"cond_m_conv": 1,
"cond_res_scale": [
null,
true,
false
],
"cond_strides_t": [
2,
2,
2
],
"cond_width": [
128,
1024,
1024
],
"cond_zero_out": false,
"copy_input": false,
"fp16_params": true,
"hop_fraction": [
0.125,
0.5,
0.5
],
"init_std": 0.2,
"lyric_conditioning": [
true,
false,
false
],
"max_duration": 600.0,
"max_nb_genres": 5,
"merged_decoder": [
true,
false,
false
],
"metadata_conditioning": true,
"metadata_dims": [
[
120,
4111
],
[
120,
4111
],
[
120,
4111
]
],
"min_duration": 23.8,
"model_type": "jukebox",
"nb_priors": 3,
"nb_relevant_lyric_tokens": [
512,
0,
0
],
"lyric_enc_attn_dropout": 0.0,
"lyric_enc_attn_order": [
2,
0,
0
],
"lyric_enc_blocks": 32,
"lyric_enc_depth": [
18,
3,
3
],
"lyric_enc_emb_dropout": 0.0,
"lyric_enc_heads": 4,
"lyric_enc_init_scale": [
0.1,
0.4,
0.4
],
"lyric_enc_loss_fraction": [
0.4,
0.0,
0.0
],
"lyric_enc_m_attn": 0.25,
"lyric_enc_m_mlp": 1.0,
"lyric_enc_n_vocab": 80,
"lyric_enc_pos_init": false,
"lyric_enc_res_scale": false,
"lyric_enc_resid_dropout": 0.0,
"lyric_enc_spread": null,
"lyric_enc_width": [
1280,
128,
128
],
"lyric_enc_zero_out": false,
"prior_alignment_head": [
2,
null,
null
],
"prior_alignment_layer": [
68,
null,
null
],
"prior_attn_dropout": 0,
"prior_attn_order": [
10,
2,
2
],
"prior_blocks": 128,
"prior_depth": [
79,
72,
72
],
"prior_emb_dropout": 0,
"prior_init_scale": [
0.2,
1,
1
],
"prior_latent_dim": 2048,
"prior_m_attn": 0.25,
"prior_n_ctx": [
8192,
8192,
8192
],
"prior_n_heads": [
8,
1,
1
],
"prior_pos_init": false,
"prior_res_scale": false,
"prior_resid_dropout": 0,
"prior_spread": null,
"prior_width": [
4800,
1920,
1920
],
"prior_zero_out": false,
"sample_length": 1058304,
"sampling_rate": 44100,
"single_enc_dec": [
false,
false,
false
],
"timing_dims": 128,
"torch_dtype": "float32",
"transformers_version": "4.22.0.dev0",
"vqvae_codebook_dimension": 2048,
"vqvae_commit": 0.02,
"vqvae_conv_block_depth": 4,
"vqvae_conv_block_width": 32,
"vqvae_depth": 4,
"vqvae_dilation_cycle": null,
"vqvae_dilation_growth_rate": 3,
"vqvae_downs_t": [
3,
2,
2
],
"vqvae_emmbedding_width": 64,
"vqvae_levels": 3,
"vqvae_lmu": 0.99,
"vqvae_m_conv": 1,
"vqvae_multipliers": [
2,
1,
1
],
"vqvae_music_tokens_shapes": [
[
8268
],
[
33072
],
[
132288
]
],
"vqvae_reverse_decoder_dilation": 1,
"vqvae_strides_t": [
2,
2,
2
],
"vqvae_width": 64
}