|
{ |
|
"_commit_hash": null, |
|
"_name_or_path": "/home/arthur_huggingface_co/transformers/jukebox-5b-lyrics-converted", |
|
"architectures": [ |
|
"JukeboxModel" |
|
], |
|
"hop_fraction": [ |
|
0.125, |
|
0.5, |
|
0.5 |
|
], |
|
"init_std": 0.2, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"metadata_conditioning": true, |
|
"min_duration": 23.8, |
|
"model_type": "jukebox", |
|
"nb_priors": 3, |
|
"prior_0": { |
|
"act_fn": "quick_gelu", |
|
"alignment_head": 2, |
|
"alignment_layer": 68, |
|
"attention_multiplier": 0.25, |
|
"attention_pattern": "large_separated_enc_dec_w_lyrics", |
|
"attn_dropout": 0, |
|
"attn_res_scale": false, |
|
"blocks": 64, |
|
"conv_res_scale": null, |
|
"depth": 79, |
|
"emb_dropout": 0, |
|
"embed_dim": 2048, |
|
"encoder_attention_multiplier": 0.25, |
|
"encoder_attention_pattern": "RawColumnPreviousRowAttention", |
|
"encoder_attn_dropout": 0.0, |
|
"encoder_attn_res_scale": false, |
|
"encoder_blocks": 32, |
|
"encoder_depth": 18, |
|
"encoder_emb_dropout": 0.0, |
|
"encoder_heads": 4, |
|
"encoder_init_scale": 0.1, |
|
"encoder_loss_fraction": [ |
|
0.4, |
|
0.0, |
|
0.0 |
|
], |
|
"encoder_mlp_multiplier": 1.0, |
|
"encoder_n_vocab": 80, |
|
"encoder_resid_dropout": 0.0, |
|
"encoder_spread": null, |
|
"encoder_width": 1280, |
|
"encoder_zero_out": false, |
|
"init_scale": 0.2, |
|
"is_encoder_decoder": false, |
|
"lyric_conditioning": true, |
|
"mask": true, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"merged_decoder": true, |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
120, |
|
4111 |
|
], |
|
"min_duration": 23.8, |
|
"mlp_multiplier": 1.0, |
|
"model_type": "jukebox", |
|
"n_ctx": 8192, |
|
"n_heads": 8, |
|
"nb_relevant_lyric_tokens": 512, |
|
"res_conv_depth": null, |
|
"res_conv_width": null, |
|
"res_convolution_multiplier": null, |
|
"res_dilation_cycle": null, |
|
"res_dilation_growth_rate": null, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"resid_dropout": 0, |
|
"sampling_rate": 44100, |
|
"spread": null, |
|
"timing_dims": 128, |
|
"transformers_version": "4.25.0.dev0", |
|
"width": 4800, |
|
"zero_out": false |
|
}, |
|
"prior_1": { |
|
"act_fn": "quick_gelu", |
|
"alignment_head": null, |
|
"alignment_layer": null, |
|
"attention_multiplier": 0.25, |
|
"attention_pattern": "RawColumnPreviousRowAttention", |
|
"attn_dropout": 0, |
|
"attn_res_scale": false, |
|
"blocks": 64, |
|
"conv_res_scale": true, |
|
"depth": 72, |
|
"emb_dropout": 0, |
|
"embed_dim": 2048, |
|
"encoder_attention_multiplier": null, |
|
"encoder_attention_pattern": null, |
|
"encoder_attn_dropout": null, |
|
"encoder_attn_res_scale": false, |
|
"encoder_blocks": null, |
|
"encoder_depth": null, |
|
"encoder_emb_dropout": null, |
|
"encoder_heads": null, |
|
"encoder_init_scale": null, |
|
"encoder_loss_fraction": [ |
|
0.4, |
|
0.0, |
|
0.0 |
|
], |
|
"encoder_mlp_multiplier": null, |
|
"encoder_n_vocab": 79, |
|
"encoder_resid_dropout": null, |
|
"encoder_spread": null, |
|
"encoder_width": null, |
|
"encoder_zero_out": null, |
|
"init_scale": 1, |
|
"is_encoder_decoder": false, |
|
"lyric_conditioning": true, |
|
"mask": true, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"merged_decoder": false, |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
120, |
|
4111 |
|
], |
|
"min_duration": 23.8, |
|
"mlp_multiplier": 1.0, |
|
"model_type": "jukebox", |
|
"n_ctx": 8192, |
|
"n_heads": 1, |
|
"nb_relevant_lyric_tokens": 0, |
|
"res_conv_depth": 16, |
|
"res_conv_width": 1024, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": 8, |
|
"res_dilation_growth_rate": 3, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"resid_dropout": 0, |
|
"sampling_rate": 44100, |
|
"spread": null, |
|
"timing_dims": 128, |
|
"transformers_version": "4.25.0.dev0", |
|
"width": 1920, |
|
"zero_out": false |
|
}, |
|
"prior_2": { |
|
"act_fn": "quick_gelu", |
|
"alignment_head": null, |
|
"alignment_layer": null, |
|
"attention_multiplier": 0.25, |
|
"attention_pattern": "RawColumnPreviousRowAttention", |
|
"attn_dropout": 0, |
|
"attn_res_scale": false, |
|
"blocks": 64, |
|
"conv_res_scale": false, |
|
"depth": 72, |
|
"emb_dropout": 0, |
|
"embed_dim": 2048, |
|
"encoder_attention_multiplier": null, |
|
"encoder_attention_pattern": null, |
|
"encoder_attn_dropout": null, |
|
"encoder_attn_res_scale": false, |
|
"encoder_blocks": null, |
|
"encoder_depth": null, |
|
"encoder_emb_dropout": null, |
|
"encoder_heads": null, |
|
"encoder_init_scale": null, |
|
"encoder_loss_fraction": [ |
|
0.4, |
|
0.0, |
|
0.0 |
|
], |
|
"encoder_mlp_multiplier": null, |
|
"encoder_n_vocab": 79, |
|
"encoder_resid_dropout": null, |
|
"encoder_spread": null, |
|
"encoder_width": null, |
|
"encoder_zero_out": null, |
|
"init_scale": 1, |
|
"is_encoder_decoder": false, |
|
"lyric_conditioning": false, |
|
"mask": true, |
|
"max_duration": 600.0, |
|
"max_nb_genres": 5, |
|
"merged_decoder": false, |
|
"metadata_conditioning": true, |
|
"metadata_dims": [ |
|
120, |
|
4111 |
|
], |
|
"min_duration": 23.8, |
|
"mlp_multiplier": 1.0, |
|
"model_type": "jukebox", |
|
"n_ctx": 8192, |
|
"n_heads": 1, |
|
"nb_relevant_lyric_tokens": 0, |
|
"res_conv_depth": 16, |
|
"res_conv_width": 1024, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": 8, |
|
"res_dilation_growth_rate": 3, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"resid_dropout": 0, |
|
"sampling_rate": 44100, |
|
"spread": null, |
|
"timing_dims": 128, |
|
"transformers_version": "4.25.0.dev0", |
|
"width": 1920, |
|
"zero_out": false |
|
}, |
|
"sampling_rate": 44100, |
|
"timing_dims": 128, |
|
"torch_dtype": "float32", |
|
"transformers_version": null, |
|
"vqvae_config": { |
|
"act_fn": "relu", |
|
"codebook_dimension": 2048, |
|
"commit": 0.02, |
|
"conv_input_shape": 1, |
|
"conv_res_scale": false, |
|
"embed_dim": 64, |
|
"hop_fraction": [ |
|
0.125, |
|
0.5, |
|
0.5 |
|
], |
|
"levels": 3, |
|
"lmu": 0.99, |
|
"model_type": "", |
|
"multipliers": [ |
|
2, |
|
1, |
|
1 |
|
], |
|
"res_conv_depth": 4, |
|
"res_conv_width": 32, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": null, |
|
"res_dilation_growth_rate": 3, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"sample_length": 1058304, |
|
"transformers_version": "4.25.0.dev0" |
|
}, |
|
"vqvae_config_dict": { |
|
"act_fn": "relu", |
|
"codebook_dimension": 2048, |
|
"commit": 0.02, |
|
"conv_input_shape": 1, |
|
"conv_res_scale": false, |
|
"embed_dim": 64, |
|
"hop_fraction": [ |
|
0.125, |
|
0.5, |
|
0.5 |
|
], |
|
"levels": 3, |
|
"lmu": 0.99, |
|
"model_type": "", |
|
"multipliers": [ |
|
2, |
|
1, |
|
1 |
|
], |
|
"res_conv_depth": 4, |
|
"res_conv_width": 32, |
|
"res_convolution_multiplier": 1, |
|
"res_dilation_cycle": null, |
|
"res_dilation_growth_rate": 3, |
|
"res_downs_t": [ |
|
3, |
|
2, |
|
2 |
|
], |
|
"res_strides_t": [ |
|
2, |
|
2, |
|
2 |
|
], |
|
"sample_length": 1058304, |
|
"transformers_version": "4.24.0.dev0" |
|
} |
|
} |
|
|