ArthurZ HF staff commited on
Commit
62e97d2
1 Parent(s): a66e290
Files changed (1) hide show
  1. config.json +85 -118
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_commit_hash": null,
3
- "_name_or_path": "/home/arthur_huggingface_co/transformers/jukebox-5b-lyrics-converted",
4
  "architectures": [
5
  "JukeboxModel"
6
  ],
@@ -26,32 +26,69 @@
26
  "attn_res_scale": false,
27
  "blocks": 64,
28
  "conv_res_scale": null,
29
- "depth": 79,
30
  "emb_dropout": 0,
31
- "embed_dim": 2048,
32
- "encoder_attention_multiplier": 0.25,
33
- "encoder_attention_pattern": "RawColumnPreviousRowAttention",
34
- "encoder_attn_dropout": 0.0,
35
- "encoder_attn_res_scale": false,
36
- "encoder_blocks": 32,
37
- "encoder_depth": 18,
38
- "encoder_emb_dropout": 0.0,
39
- "encoder_heads": 4,
40
- "encoder_init_scale": 0.1,
41
- "encoder_loss_fraction": [
42
- 0.4,
43
- 0.0,
44
- 0.0
45
- ],
46
- "encoder_mlp_multiplier": 1.0,
47
- "encoder_n_vocab": 80,
48
- "encoder_resid_dropout": 0.0,
49
- "encoder_spread": null,
50
- "encoder_width": 1280,
51
- "encoder_zero_out": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  "init_scale": 0.2,
53
  "is_encoder_decoder": false,
54
- "lyric_conditioning": true,
 
55
  "mask": true,
56
  "max_duration": 600.0,
57
  "max_nb_genres": 5,
@@ -63,10 +100,12 @@
63
  ],
64
  "min_duration": 23.8,
65
  "mlp_multiplier": 1.0,
66
- "model_type": "jukebox",
 
67
  "n_ctx": 8192,
68
  "n_heads": 8,
69
  "nb_relevant_lyric_tokens": 512,
 
70
  "res_conv_depth": null,
71
  "res_conv_width": null,
72
  "res_convolution_multiplier": null,
@@ -86,8 +125,6 @@
86
  "sampling_rate": 44100,
87
  "spread": null,
88
  "timing_dims": 128,
89
- "transformers_version": "4.25.0.dev0",
90
- "width": 4800,
91
  "zero_out": false
92
  },
93
  "prior_1": {
@@ -100,32 +137,15 @@
100
  "attn_res_scale": false,
101
  "blocks": 64,
102
  "conv_res_scale": true,
103
- "depth": 72,
104
  "emb_dropout": 0,
105
  "embed_dim": 2048,
106
- "encoder_attention_multiplier": null,
107
- "encoder_attention_pattern": null,
108
- "encoder_attn_dropout": null,
109
- "encoder_attn_res_scale": false,
110
- "encoder_blocks": null,
111
- "encoder_depth": null,
112
- "encoder_emb_dropout": null,
113
- "encoder_heads": null,
114
- "encoder_init_scale": null,
115
- "encoder_loss_fraction": [
116
- 0.4,
117
- 0.0,
118
- 0.0
119
- ],
120
- "encoder_mlp_multiplier": null,
121
- "encoder_n_vocab": 79,
122
- "encoder_resid_dropout": null,
123
- "encoder_spread": null,
124
- "encoder_width": null,
125
- "encoder_zero_out": null,
126
  "init_scale": 1,
127
  "is_encoder_decoder": false,
128
- "lyric_conditioning": true,
 
129
  "mask": true,
130
  "max_duration": 600.0,
131
  "max_nb_genres": 5,
@@ -137,10 +157,12 @@
137
  ],
138
  "min_duration": 23.8,
139
  "mlp_multiplier": 1.0,
140
- "model_type": "jukebox",
 
141
  "n_ctx": 8192,
142
  "n_heads": 1,
143
  "nb_relevant_lyric_tokens": 0,
 
144
  "res_conv_depth": 16,
145
  "res_conv_width": 1024,
146
  "res_convolution_multiplier": 1,
@@ -160,8 +182,6 @@
160
  "sampling_rate": 44100,
161
  "spread": null,
162
  "timing_dims": 128,
163
- "transformers_version": "4.25.0.dev0",
164
- "width": 1920,
165
  "zero_out": false
166
  },
167
  "prior_2": {
@@ -174,32 +194,15 @@
174
  "attn_res_scale": false,
175
  "blocks": 64,
176
  "conv_res_scale": false,
177
- "depth": 72,
178
  "emb_dropout": 0,
179
  "embed_dim": 2048,
180
- "encoder_attention_multiplier": null,
181
- "encoder_attention_pattern": null,
182
- "encoder_attn_dropout": null,
183
- "encoder_attn_res_scale": false,
184
- "encoder_blocks": null,
185
- "encoder_depth": null,
186
- "encoder_emb_dropout": null,
187
- "encoder_heads": null,
188
- "encoder_init_scale": null,
189
- "encoder_loss_fraction": [
190
- 0.4,
191
- 0.0,
192
- 0.0
193
- ],
194
- "encoder_mlp_multiplier": null,
195
- "encoder_n_vocab": 79,
196
- "encoder_resid_dropout": null,
197
- "encoder_spread": null,
198
- "encoder_width": null,
199
- "encoder_zero_out": null,
200
  "init_scale": 1,
201
  "is_encoder_decoder": false,
202
- "lyric_conditioning": false,
 
203
  "mask": true,
204
  "max_duration": 600.0,
205
  "max_nb_genres": 5,
@@ -211,10 +214,12 @@
211
  ],
212
  "min_duration": 23.8,
213
  "mlp_multiplier": 1.0,
214
- "model_type": "jukebox",
 
215
  "n_ctx": 8192,
216
  "n_heads": 1,
217
  "nb_relevant_lyric_tokens": 0,
 
218
  "res_conv_depth": 16,
219
  "res_conv_width": 1024,
220
  "res_convolution_multiplier": 1,
@@ -234,8 +239,6 @@
234
  "sampling_rate": 44100,
235
  "spread": null,
236
  "timing_dims": 128,
237
- "transformers_version": "4.25.0.dev0",
238
- "width": 1920,
239
  "zero_out": false
240
  },
241
  "sampling_rate": 44100,
@@ -244,45 +247,6 @@
244
  "transformers_version": null,
245
  "vqvae_config": {
246
  "act_fn": "relu",
247
- "codebook_dimension": 2048,
248
- "commit": 0.02,
249
- "conv_input_shape": 1,
250
- "conv_res_scale": false,
251
- "embed_dim": 64,
252
- "hop_fraction": [
253
- 0.125,
254
- 0.5,
255
- 0.5
256
- ],
257
- "levels": 3,
258
- "lmu": 0.99,
259
- "model_type": "",
260
- "multipliers": [
261
- 2,
262
- 1,
263
- 1
264
- ],
265
- "res_conv_depth": 4,
266
- "res_conv_width": 32,
267
- "res_convolution_multiplier": 1,
268
- "res_dilation_cycle": null,
269
- "res_dilation_growth_rate": 3,
270
- "res_downs_t": [
271
- 3,
272
- 2,
273
- 2
274
- ],
275
- "res_strides_t": [
276
- 2,
277
- 2,
278
- 2
279
- ],
280
- "sample_length": 1058304,
281
- "transformers_version": "4.25.0.dev0"
282
- },
283
- "vqvae_config_dict": {
284
- "act_fn": "relu",
285
- "codebook_dimension": 2048,
286
  "commit": 0.02,
287
  "conv_input_shape": 1,
288
  "conv_res_scale": false,
@@ -292,14 +256,16 @@
292
  0.5,
293
  0.5
294
  ],
 
295
  "levels": 3,
296
  "lmu": 0.99,
297
- "model_type": "",
298
  "multipliers": [
299
  2,
300
  1,
301
  1
302
  ],
 
303
  "res_conv_depth": 4,
304
  "res_conv_width": 32,
305
  "res_convolution_multiplier": 1,
@@ -316,6 +282,7 @@
316
  2
317
  ],
318
  "sample_length": 1058304,
319
- "transformers_version": "4.24.0.dev0"
 
320
  }
321
  }
 
1
  {
2
  "_commit_hash": null,
3
+ "_name_or_path": "jukebox-5b-lyrics",
4
  "architectures": [
5
  "JukeboxModel"
6
  ],
 
26
  "attn_res_scale": false,
27
  "blocks": 64,
28
  "conv_res_scale": null,
 
29
  "emb_dropout": 0,
30
+ "encoder_config": {
31
+ "act_fn": "quick_gelu",
32
+ "alignment_head": 2,
33
+ "alignment_layer": 68,
34
+ "attention_multiplier": 0.25,
35
+ "attention_pattern": "RawColumnPreviousRowAttention",
36
+ "attn_dropout": 0.0,
37
+ "attn_res_scale": false,
38
+ "blocks": 32,
39
+ "conv_res_scale": null,
40
+ "emb_dropout": 0.0,
41
+ "encoder_config": null,
42
+ "encoder_loss_fraction": 0.4,
43
+ "hidden_size": 1280,
44
+ "init_scale": 0.1,
45
+ "is_encoder_decoder": false,
46
+ "level": 0,
47
+ "lyric_vocab_size": 80,
48
+ "mask": true,
49
+ "max_duration": 600,
50
+ "max_nb_genres": 1,
51
+ "merged_decoder": false,
52
+ "metadata_conditioning": true,
53
+ "metadata_dims": [
54
+ 604,
55
+ 7898
56
+ ],
57
+ "min_duration": 0,
58
+ "mlp_multiplier": 1.0,
59
+ "model_type": "jukebox_prior",
60
+ "music_vocab_size": 2048,
61
+ "n_ctx": 6144,
62
+ "n_heads": 4,
63
+ "nb_relevant_lyric_tokens": 384,
64
+ "num_layers": 18,
65
+ "res_conv_depth": 3,
66
+ "res_conv_width": 128,
67
+ "res_convolution_multiplier": 1,
68
+ "res_dilation_cycle": null,
69
+ "res_dilation_growth_rate": 1,
70
+ "res_downs_t": [
71
+ 3,
72
+ 2,
73
+ 2
74
+ ],
75
+ "res_strides_t": [
76
+ 2,
77
+ 2,
78
+ 2
79
+ ],
80
+ "resid_dropout": 0.0,
81
+ "sampling_rate": 44100,
82
+ "spread": null,
83
+ "timing_dims": 64,
84
+ "zero_out": false
85
+ },
86
+ "encoder_loss_fraction": 0.4,
87
+ "hidden_size": 4800,
88
  "init_scale": 0.2,
89
  "is_encoder_decoder": false,
90
+ "level": 0,
91
+ "lyric_vocab_size": 80,
92
  "mask": true,
93
  "max_duration": 600.0,
94
  "max_nb_genres": 5,
 
100
  ],
101
  "min_duration": 23.8,
102
  "mlp_multiplier": 1.0,
103
+ "model_type": "jukebox_prior",
104
+ "music_vocab_size": 2048,
105
  "n_ctx": 8192,
106
  "n_heads": 8,
107
  "nb_relevant_lyric_tokens": 512,
108
+ "num_layers": 79,
109
  "res_conv_depth": null,
110
  "res_conv_width": null,
111
  "res_convolution_multiplier": null,
 
125
  "sampling_rate": 44100,
126
  "spread": null,
127
  "timing_dims": 128,
 
 
128
  "zero_out": false
129
  },
130
  "prior_1": {
 
137
  "attn_res_scale": false,
138
  "blocks": 64,
139
  "conv_res_scale": true,
 
140
  "emb_dropout": 0,
141
  "embed_dim": 2048,
142
+ "encoder_config": null,
143
+ "encoder_loss_fraction": 0.4,
144
+ "hidden_size": 1920,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  "init_scale": 1,
146
  "is_encoder_decoder": false,
147
+ "level": 1,
148
+ "lyric_vocab_size": 80,
149
  "mask": true,
150
  "max_duration": 600.0,
151
  "max_nb_genres": 5,
 
157
  ],
158
  "min_duration": 23.8,
159
  "mlp_multiplier": 1.0,
160
+ "model_type": "jukebox_prior",
161
+ "music_vocab_size": 2048,
162
  "n_ctx": 8192,
163
  "n_heads": 1,
164
  "nb_relevant_lyric_tokens": 0,
165
+ "num_layers": 72,
166
  "res_conv_depth": 16,
167
  "res_conv_width": 1024,
168
  "res_convolution_multiplier": 1,
 
182
  "sampling_rate": 44100,
183
  "spread": null,
184
  "timing_dims": 128,
 
 
185
  "zero_out": false
186
  },
187
  "prior_2": {
 
194
  "attn_res_scale": false,
195
  "blocks": 64,
196
  "conv_res_scale": false,
 
197
  "emb_dropout": 0,
198
  "embed_dim": 2048,
199
+ "encoder_config": null,
200
+ "encoder_loss_fraction": 0.4,
201
+ "hidden_size": 1920,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  "init_scale": 1,
203
  "is_encoder_decoder": false,
204
+ "level": 2,
205
+ "lyric_vocab_size": 80,
206
  "mask": true,
207
  "max_duration": 600.0,
208
  "max_nb_genres": 5,
 
214
  ],
215
  "min_duration": 23.8,
216
  "mlp_multiplier": 1.0,
217
+ "model_type": "jukebox_prior",
218
+ "music_vocab_size": 2048,
219
  "n_ctx": 8192,
220
  "n_heads": 1,
221
  "nb_relevant_lyric_tokens": 0,
222
+ "num_layers": 72,
223
  "res_conv_depth": 16,
224
  "res_conv_width": 1024,
225
  "res_convolution_multiplier": 1,
 
239
  "sampling_rate": 44100,
240
  "spread": null,
241
  "timing_dims": 128,
 
 
242
  "zero_out": false
243
  },
244
  "sampling_rate": 44100,
 
247
  "transformers_version": null,
248
  "vqvae_config": {
249
  "act_fn": "relu",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  "commit": 0.02,
251
  "conv_input_shape": 1,
252
  "conv_res_scale": false,
 
256
  0.5,
257
  0.5
258
  ],
259
+ "init_scale": 0.2,
260
  "levels": 3,
261
  "lmu": 0.99,
262
+ "model_type": "jukebox_vqvae",
263
  "multipliers": [
264
  2,
265
  1,
266
  1
267
  ],
268
+ "nb_discrete_codes": 2048,
269
  "res_conv_depth": 4,
270
  "res_conv_width": 32,
271
  "res_convolution_multiplier": 1,
 
282
  2
283
  ],
284
  "sample_length": 1058304,
285
+ "transformers_version": "4.25.0.dev0",
286
+ "zero_out": false
287
  }
288
  }