|
{ |
|
"_name_or_path": "jukebox-5b-lyrics", |
|
"architectures": [ |
|
"JukeboxModel" |
|
], |
|
"cond_c_res": [ |
|
0, |
|
1, |
|
1 |
|
], |
|
"cond_depth": [ |
|
3, |
|
16, |
|
16 |
|
], |
|
"cond_dilation_cycle": [ |
|
null, |
|
8, |
|
8 |
|
], |
|
"cond_dilation_growth_rate": [ |
|
1, |
|
3, |
|
3 |
|
], |
|
"cond_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"cond_m_conv": 1, |
|
"cond_res_scale": [ |
|
null, |
|
true, |
|
false |
|
], |
|
"cond_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"cond_width": [ |
|
128, |
|
1024, |
|
1024 |
|
], |
|
"cond_zero_out": false, |
|
"copy_input": false, |
|
"fp16_params": true, |
|
"hop_fraction": [ |
|
0.125, |
|
0.5, |
|
0.5 |
|
], |
|
"init_std": 0.2, |
|
"lyric_conditioning": [ |
|
true, |
|
false, |
|
false |
|
], |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"merged_decoder": [ |
|
true, |
|
false, |
|
false |
|
], |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
[ |
|
120, |
|
4111 |
|
], |
|
[ |
|
120, |
|
4111 |
|
], |
|
[ |
|
120, |
|
4111 |
|
] |
|
], |
|
"min_duration": 23.8, |
|
"model_type": "jukebox", |
|
"nb_priors": 3, |
|
"nb_relevant_lyric_tokens": [ |
|
512, |
|
0, |
|
0 |
|
], |
|
"lyric_enc_attn_dropout": 0.0, |
|
"lyric_enc_attn_order": [ |
|
2, |
|
0, |
|
0 |
|
], |
|
"lyric_enc_blocks": 32, |
|
"lyric_enc_depth": [ |
|
18, |
|
3, |
|
3 |
|
], |
|
"lyric_enc_emb_dropout": 0.0, |
|
"lyric_enc_heads": 4, |
|
"lyric_enc_init_scale": [ |
|
0.1, |
|
0.4, |
|
0.4 |
|
], |
|
"lyric_enc_loss_fraction": [ |
|
0.4, |
|
0.0, |
|
0.0 |
|
], |
|
"lyric_enc_m_attn": 0.25, |
|
"lyric_enc_m_mlp": 1.0, |
|
"lyric_enc_n_vocab": 80, |
|
"lyric_enc_pos_init": false, |
|
"lyric_enc_res_scale": false, |
|
"lyric_enc_resid_dropout": 0.0, |
|
"lyric_enc_spread": null, |
|
"lyric_enc_width": [ |
|
1280, |
|
128, |
|
128 |
|
], |
|
"lyric_enc_zero_out": false, |
|
"prior_alignment_head": [ |
|
2, |
|
null, |
|
null |
|
], |
|
"prior_alignment_layer": [ |
|
68, |
|
null, |
|
null |
|
], |
|
"prior_attn_dropout": 0, |
|
"prior_attn_order": [ |
|
10, |
|
2, |
|
2 |
|
], |
|
"prior_blocks": 128, |
|
"prior_depth": [ |
|
79, |
|
72, |
|
72 |
|
], |
|
"prior_emb_dropout": 0, |
|
"prior_init_scale": [ |
|
0.2, |
|
1, |
|
1 |
|
], |
|
"prior_latent_dim": 2048, |
|
"prior_m_attn": 0.25, |
|
"prior_n_ctx": [ |
|
8192, |
|
8192, |
|
8192 |
|
], |
|
"prior_n_heads": [ |
|
8, |
|
1, |
|
1 |
|
], |
|
"prior_pos_init": false, |
|
"prior_res_scale": false, |
|
"prior_resid_dropout": 0, |
|
"prior_spread": null, |
|
"prior_width": [ |
|
4800, |
|
1920, |
|
1920 |
|
], |
|
"prior_zero_out": false, |
|
"sample_length": 1058304, |
|
"sampling_rate": 44100, |
|
"single_enc_dec": [ |
|
false, |
|
false, |
|
false |
|
], |
|
"timing_dims": 128, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.22.0.dev0", |
|
"vqvae_codebook_dimension": 2048, |
|
"vqvae_commit": 0.02, |
|
"vqvae_conv_block_depth": 4, |
|
"vqvae_conv_block_width": 32, |
|
"vqvae_depth": 4, |
|
"vqvae_dilation_cycle": null, |
|
"vqvae_dilation_growth_rate": 3, |
|
"vqvae_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"vqvae_emmbedding_width": 64, |
|
"vqvae_levels": 3, |
|
"vqvae_lmu": 0.99, |
|
"vqvae_m_conv": 1, |
|
"vqvae_multipliers": [ |
|
2, |
|
1, |
|
1 |
|
], |
|
"vqvae_music_tokens_shapes": [ |
|
[ |
|
8268 |
|
], |
|
[ |
|
33072 |
|
], |
|
[ |
|
132288 |
|
] |
|
], |
|
"vqvae_reverse_decoder_dilation": 1, |
|
"vqvae_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"vqvae_width": 64 |
|
} |
|
|