jukebox-5b-lyrics / config.json
ArthurZ's picture
ArthurZ HF staff
Upload model
f874908
raw
history blame
7.24 kB
{
"_commit_hash": null,
"_name_or_path": "/home/arthur_huggingface_co/transformers/jukebox-5b-lyrics-converted",
"architectures": [
"JukeboxModel"
],
"hop_fraction": [
0.125,
0.5,
0.5
],
"init_std": 0.2,
"max_duration": 600.0,
"max_nb_genres": 5,
"metadata_conditioning": true,
"min_duration": 23.8,
"model_type": "jukebox",
"nb_priors": 3,
"prior_0": {
"act_fn": "quick_gelu",
"alignment_head": 2,
"alignment_layer": 68,
"attention_multiplier": 0.25,
"attention_pattern": "large_separated_enc_dec_w_lyrics",
"attn_dropout": 0,
"attn_res_scale": false,
"blocks": 64,
"conv_res_scale": null,
"depth": 79,
"emb_dropout": 0,
"embed_dim": 2048,
"encoder_attention_multiplier": 0.25,
"encoder_attention_pattern": "RawColumnPreviousRowAttention",
"encoder_attn_dropout": 0.0,
"encoder_attn_res_scale": false,
"encoder_blocks": 32,
"encoder_depth": 18,
"encoder_emb_dropout": 0.0,
"encoder_heads": 4,
"encoder_init_scale": 0.1,
"encoder_loss_fraction": [
0.4,
0.0,
0.0
],
"encoder_mlp_multiplier": 1.0,
"encoder_n_vocab": 80,
"encoder_resid_dropout": 0.0,
"encoder_spread": null,
"encoder_width": 1280,
"encoder_zero_out": false,
"init_scale": 0.2,
"is_encoder_decoder": false,
"lyric_conditioning": true,
"mask": true,
"max_duration": 600.0,
"max_nb_genres": 5,
"merged_decoder": true,
"metadata_conditioning": true,
"metadata_dims": [
120,
4111
],
"min_duration": 23.8,
"mlp_multiplier": 1.0,
"model_type": "jukebox",
"n_ctx": 8192,
"n_heads": 8,
"nb_relevant_lyric_tokens": 512,
"res_conv_depth": null,
"res_conv_width": null,
"res_convolution_multiplier": null,
"res_dilation_cycle": null,
"res_dilation_growth_rate": null,
"res_downs_t": [
3,
2,
2
],
"res_strides_t": [
2,
2,
2
],
"resid_dropout": 0,
"sampling_rate": 44100,
"spread": null,
"timing_dims": 128,
"transformers_version": "4.25.0.dev0",
"width": 4800,
"zero_out": false
},
"prior_1": {
"act_fn": "quick_gelu",
"alignment_head": null,
"alignment_layer": null,
"attention_multiplier": 0.25,
"attention_pattern": "RawColumnPreviousRowAttention",
"attn_dropout": 0,
"attn_res_scale": false,
"blocks": 64,
"conv_res_scale": true,
"depth": 72,
"emb_dropout": 0,
"embed_dim": 2048,
"encoder_attention_multiplier": null,
"encoder_attention_pattern": null,
"encoder_attn_dropout": null,
"encoder_attn_res_scale": false,
"encoder_blocks": null,
"encoder_depth": null,
"encoder_emb_dropout": null,
"encoder_heads": null,
"encoder_init_scale": null,
"encoder_loss_fraction": [
0.4,
0.0,
0.0
],
"encoder_mlp_multiplier": null,
"encoder_n_vocab": 79,
"encoder_resid_dropout": null,
"encoder_spread": null,
"encoder_width": null,
"encoder_zero_out": null,
"init_scale": 1,
"is_encoder_decoder": false,
"lyric_conditioning": true,
"mask": true,
"max_duration": 600.0,
"max_nb_genres": 5,
"merged_decoder": false,
"metadata_conditioning": true,
"metadata_dims": [
120,
4111
],
"min_duration": 23.8,
"mlp_multiplier": 1.0,
"model_type": "jukebox",
"n_ctx": 8192,
"n_heads": 1,
"nb_relevant_lyric_tokens": 0,
"res_conv_depth": 16,
"res_conv_width": 1024,
"res_convolution_multiplier": 1,
"res_dilation_cycle": 8,
"res_dilation_growth_rate": 3,
"res_downs_t": [
3,
2,
2
],
"res_strides_t": [
2,
2,
2
],
"resid_dropout": 0,
"sampling_rate": 44100,
"spread": null,
"timing_dims": 128,
"transformers_version": "4.25.0.dev0",
"width": 1920,
"zero_out": false
},
"prior_2": {
"act_fn": "quick_gelu",
"alignment_head": null,
"alignment_layer": null,
"attention_multiplier": 0.25,
"attention_pattern": "RawColumnPreviousRowAttention",
"attn_dropout": 0,
"attn_res_scale": false,
"blocks": 64,
"conv_res_scale": false,
"depth": 72,
"emb_dropout": 0,
"embed_dim": 2048,
"encoder_attention_multiplier": null,
"encoder_attention_pattern": null,
"encoder_attn_dropout": null,
"encoder_attn_res_scale": false,
"encoder_blocks": null,
"encoder_depth": null,
"encoder_emb_dropout": null,
"encoder_heads": null,
"encoder_init_scale": null,
"encoder_loss_fraction": [
0.4,
0.0,
0.0
],
"encoder_mlp_multiplier": null,
"encoder_n_vocab": 79,
"encoder_resid_dropout": null,
"encoder_spread": null,
"encoder_width": null,
"encoder_zero_out": null,
"init_scale": 1,
"is_encoder_decoder": false,
"lyric_conditioning": false,
"mask": true,
"max_duration": 600.0,
"max_nb_genres": 5,
"merged_decoder": false,
"metadata_conditioning": true,
"metadata_dims": [
120,
4111
],
"min_duration": 23.8,
"mlp_multiplier": 1.0,
"model_type": "jukebox",
"n_ctx": 8192,
"n_heads": 1,
"nb_relevant_lyric_tokens": 0,
"res_conv_depth": 16,
"res_conv_width": 1024,
"res_convolution_multiplier": 1,
"res_dilation_cycle": 8,
"res_dilation_growth_rate": 3,
"res_downs_t": [
3,
2,
2
],
"res_strides_t": [
2,
2,
2
],
"resid_dropout": 0,
"sampling_rate": 44100,
"spread": null,
"timing_dims": 128,
"transformers_version": "4.25.0.dev0",
"width": 1920,
"zero_out": false
},
"sampling_rate": 44100,
"timing_dims": 128,
"torch_dtype": "float32",
"transformers_version": null,
"vqvae_config": {
"act_fn": "relu",
"codebook_dimension": 2048,
"commit": 0.02,
"conv_input_shape": 1,
"conv_res_scale": false,
"embed_dim": 64,
"hop_fraction": [
0.125,
0.5,
0.5
],
"levels": 3,
"lmu": 0.99,
"model_type": "",
"multipliers": [
2,
1,
1
],
"res_conv_depth": 4,
"res_conv_width": 32,
"res_convolution_multiplier": 1,
"res_dilation_cycle": null,
"res_dilation_growth_rate": 3,
"res_downs_t": [
3,
2,
2
],
"res_strides_t": [
2,
2,
2
],
"sample_length": 1058304,
"transformers_version": "4.25.0.dev0"
},
"vqvae_config_dict": {
"act_fn": "relu",
"codebook_dimension": 2048,
"commit": 0.02,
"conv_input_shape": 1,
"conv_res_scale": false,
"embed_dim": 64,
"hop_fraction": [
0.125,
0.5,
0.5
],
"levels": 3,
"lmu": 0.99,
"model_type": "",
"multipliers": [
2,
1,
1
],
"res_conv_depth": 4,
"res_conv_width": 32,
"res_convolution_multiplier": 1,
"res_dilation_cycle": null,
"res_dilation_growth_rate": 3,
"res_downs_t": [
3,
2,
2
],
"res_strides_t": [
2,
2,
2
],
"sample_length": 1058304,
"transformers_version": "4.24.0.dev0"
}
}