TulipAIs commited on
Commit
205d938
1 Parent(s): 3add6cd

moving the audiogen tab before musicgen

Browse files
Files changed (1) hide show
  1. app.py +201 -194
app.py CHANGED
@@ -188,6 +188,7 @@ def get_audio_info(audio_path):
188
  return "No tags found. Either the file is not generated by MusicGen+ V1.2.7 and higher or the tags are corrupted. (Discord removes metadata from mp4 and wav files, so you can't use them)"
189
  json_string = song.tags['COMMENT'][0]
190
  data = json.loads(json_string)
 
191
  global_prompt = str("\nGlobal Prompt: " + (data['global_prompt'] if data['global_prompt'] != "" else "none")) if 'global_prompt' in data else ""
192
  bpm = str("\nBPM: " + data['bpm']) if 'bpm' in data else ""
193
  key = str("\nKey: " + data['key']) if 'key' in data else ""
@@ -212,13 +213,15 @@ def get_audio_info(audio_path):
212
  version = str("Version: " + data['version']) if 'version' in data else "Version: Unknown"
213
  info = str(version + global_prompt + bpm + key + scale + prompts + duration + overlap + seed + audio_mode + input_length + channel + sr_select + model + custom_model + base_model + decoder + topk + topp + temperature + cfg_coef)
214
  if info == "":
215
- return "No tags found. Either the file is not generated by MusicGen+ V1.2.7 and higher or the tags are corrupted. (Discord removes metadata from mp4 and wav files, so you can't use them)"
 
216
  return info
217
  else:
218
  with open(audio_path.name) as json_file:
219
  data = json.load(json_file)
220
  #if 'global_prompt' not in data:
221
- #return "No tags found. Either the file is not generated by MusicGen+ V1.2.8a and higher or the tags are corrupted."
 
222
  global_prompt = str("\nGlobal Prompt: " + (data['global_prompt'] if data['global_prompt'] != "" else "none")) if 'global_prompt' in data else ""
223
  bpm = str("\nBPM: " + data['bpm']) if 'bpm' in data else ""
224
  key = str("\nKey: " + data['key']) if 'key' in data else ""
@@ -243,7 +246,8 @@ def get_audio_info(audio_path):
243
  version = str("Version: " + data['version']) if 'version' in data else "Version: Unknown"
244
  info = str(version + global_prompt + bpm + key + scale + prompts + duration + overlap + seed + audio_mode + input_length + channel + sr_select + model + custom_model + base_model + decoder + topk + topp + temperature + cfg_coef)
245
  if info == "":
246
- return "No tags found. Either the file is not generated by MusicGen+ V1.2.7 and higher or the tags are corrupted."
 
247
  return info
248
  else:
249
  return "Only .wav ,.mp4 and .json files are supported"
@@ -467,7 +471,8 @@ def load_diffusion():
467
  global MBD
468
  if MBD is None:
469
  print("loading MBD")
470
- MBD = MultiBandDiffusion.get_mbd_musicgen()
 
471
 
472
 
473
  def unload_diffusion():
@@ -852,12 +857,14 @@ def predict_full(gen_type, model, decoder, custom_model, base_model, prompt_amou
852
 
853
  if gen_type == "music":
854
  model_shrt = model
855
- model = "GrandaddyShmax/musicgen-" + model
 
856
  elif gen_type == "audio":
857
  model_shrt = model
858
  model = "GrandaddyShmax/audiogen-" + model
859
  base_model_shrt = base_model
860
- base_model = "GrandaddyShmax/musicgen-" + base_model
 
861
 
862
  if MODEL is None or MODEL.name != (model):
863
  load_model(model, custom_model, base_model, gen_type)
@@ -957,118 +964,106 @@ def ui_full(launch_kwargs):
957
  Welcome to Soundscapes - TulipAI’s flagship Audio Storytelling Toolkit. Designed with modern content creators in mind, our AI-driven platform generates audio sound effects in just minutes tailored to your unique needs.
958
  """
959
  )
960
- with gr.Tab("MusicGen"):
961
  gr.Markdown(
962
  """
963
- ### MusicGen
964
  """
965
  )
966
  with gr.Row():
967
  with gr.Column():
968
  with gr.Tab("Generation"):
969
  with gr.Accordion("Structure Prompts", open=False):
970
- with gr.Column():
971
- with gr.Row():
972
- struc_prompts = gr.Checkbox(label="Enable", value=False, interactive=True, container=False)
973
- bpm = gr.Number(label="BPM", value=120, interactive=True, scale=1, precision=0)
974
- key = gr.Dropdown(["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "Bb", "B"], label="Key", value="C", interactive=True)
975
- scale = gr.Dropdown(["Major", "Minor"], label="Scale", value="Major", interactive=True)
976
- with gr.Row():
977
- global_prompt = gr.Text(label="Global Prompt", interactive=True, scale=3)
978
  with gr.Row():
979
- s = gr.Slider(1, max_textboxes, value=1, step=1, label="Prompts:", interactive=True, scale=2)
980
- #s_mode = gr.Radio(["segmentation", "batch"], value="segmentation", interactive=True, scale=1, label="Generation Mode")
981
  with gr.Column():
982
- textboxes = []
983
- prompts = []
984
- repeats = []
985
- calcs = []
986
  with gr.Row():
987
- text0 = gr.Text(label="Input Text", interactive=True, scale=4)
988
- prompts.append(text0)
989
- drag0 = gr.Number(label="Repeat", value=1, interactive=True, scale=1)
990
- repeats.append(drag0)
991
- calc0 = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
992
- calcs.append(calc0)
993
  for i in range(max_textboxes):
994
- with gr.Row(visible=False) as t:
995
- text = gr.Text(label="Input Text", interactive=True, scale=3)
996
- repeat = gr.Number(label="Repeat", minimum=1, value=1, interactive=True, scale=1)
997
- calc = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
998
- textboxes.append(t)
999
- prompts.append(text)
1000
- repeats.append(repeat)
1001
- calcs.append(calc)
1002
- to_calc = gr.Button("Calculate Timings", variant="secondary")
1003
  with gr.Row():
1004
- duration = gr.Slider(minimum=1, maximum=300, value=10, step=1, label="Duration", interactive=True)
1005
  with gr.Row():
1006
- overlap = gr.Slider(minimum=1, maximum=29, value=12, step=1, label="Overlap", interactive=True)
1007
  with gr.Row():
1008
- seed = gr.Number(label="Seed", value=-1, scale=4, precision=0, interactive=True)
1009
- gr.Button('\U0001f3b2\ufe0f', scale=1).click(fn=lambda: -1, outputs=[seed], queue=False)
1010
- reuse_seed = gr.Button('\u267b\ufe0f', scale=1)
1011
 
1012
  with gr.Tab("Audio"):
1013
  with gr.Row():
1014
  with gr.Column():
1015
- input_type = gr.Radio(["file", "mic"], value="file", label="Input Type (optional)", interactive=True)
1016
- mode = gr.Radio(["melody", "sample"], label="Input Audio Mode (optional)", value="sample", interactive=True)
1017
  with gr.Row():
1018
- trim_start = gr.Number(label="Trim Start", value=0, interactive=True)
1019
- trim_end = gr.Number(label="Trim End", value=0, interactive=True)
1020
- audio = gr.Audio(source="upload", type="numpy", label="Input Audio (optional)", interactive=True)
1021
 
1022
  with gr.Tab("Customization"):
1023
  with gr.Row():
1024
  with gr.Column():
1025
- background = gr.ColorPicker(value="#0f0f0f", label="background color", interactive=True, scale=0)
1026
- bar1 = gr.ColorPicker(value="#84cc16", label="bar color start", interactive=True, scale=0)
1027
- bar2 = gr.ColorPicker(value="#10b981", label="bar color end", interactive=True, scale=0)
1028
  with gr.Column():
1029
- image = gr.Image(label="Background Image", type="filepath", interactive=True, scale=4)
1030
  with gr.Row():
1031
- height = gr.Number(label="Height", value=512, interactive=True)
1032
- width = gr.Number(label="Width", value=768, interactive=True)
1033
 
1034
  with gr.Tab("Settings"):
1035
  with gr.Row():
1036
- channel = gr.Radio(["mono", "stereo", "stereo effect"], label="Output Audio Channels", value="stereo", interactive=True, scale=1)
1037
- sr_select = gr.Dropdown(["11025", "16000", "22050", "24000", "32000", "44100", "48000"], label="Output Audio Sample Rate", value="48000", interactive=True)
1038
- with gr.Row():
1039
- model = gr.Radio(["melody", "small", "medium", "large", "custom"], label="Model", value="large", interactive=True, scale=1)
1040
- with gr.Column():
1041
- dropdown = gr.Dropdown(choices=get_available_models(), value=("No models found" if len(get_available_models()) < 1 else get_available_models()[0]), label='Custom Model (models folder)', elem_classes='slim-dropdown', interactive=True)
1042
- ui.create_refresh_button(dropdown, lambda: None, lambda: {'choices': get_available_models()}, 'refresh-button')
1043
- basemodel = gr.Radio(["small", "medium", "melody", "large"], label="Base Model", value="medium", interactive=True, scale=1)
1044
  with gr.Row():
1045
- decoder = gr.Radio(["Default", "MultiBand_Diffusion"], label="Decoder", value="Default", interactive=True)
 
1046
  with gr.Row():
1047
- topk = gr.Number(label="Top-k", value=250, interactive=True)
1048
- topp = gr.Number(label="Top-p", value=0, interactive=True)
1049
- temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
1050
- cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
1051
  with gr.Row():
1052
- submit = gr.Button("Generate", variant="primary")
1053
- # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
1054
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
1055
- with gr.Column() as c:
1056
  with gr.Tab("Output"):
1057
- output = gr.Video(label="Generated Music", scale=0)
1058
  with gr.Row():
1059
- audio_only = gr.Audio(type="numpy", label="Audio Only", interactive=False)
1060
- backup_only = gr.Audio(type="numpy", label="Backup Audio", interactive=False, visible=False)
1061
- send_audio = gr.Button("Send to Input Audio")
1062
- seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
1063
- download = gr.File(label="Generated Files", interactive=False)
1064
  with gr.Tab("Wiki"):
1065
  gr.Markdown(
1066
  """
1067
  - **[Generate (button)]:**
1068
- Generates the music with the given settings and prompts.
1069
 
1070
  - **[Interrupt (button)]:**
1071
- Stops the music generation as soon as it can, providing an incomplete output.
1072
 
1073
  ---
1074
 
@@ -1082,30 +1077,21 @@ def ui_full(launch_kwargs):
1082
  - **[Structure Prompts (checkbox)]:**
1083
  Enable/Disable the structure prompts feature.
1084
 
1085
- - **[BPM (number)]:**
1086
- Beats per minute of the generated music.
1087
-
1088
- - **[Key (dropdown)]:**
1089
- The key of the generated music.
1090
-
1091
- - **[Scale (dropdown)]:**
1092
- The scale of the generated music.
1093
-
1094
  - **[Global Prompt (text)]:**
1095
  Here write the prompt that you wish to be used for all prompt segments.
1096
 
1097
  #### Multi-Prompt:
1098
 
1099
- This feature allows you to control the music, adding variation to different time segments.
1100
- You have up to 10 prompt segments. the first prompt will always be 30s long
1101
- the other prompts will be [30s - overlap].
1102
- for example if the overlap is 10s, each prompt segment will be 20s.
1103
 
1104
  - **[Prompt Segments (number)]:**
1105
- Amount of unique prompt to generate throughout the music generation.
1106
 
1107
  - **[Prompt/Input Text (prompt)]:**
1108
- Here describe the music you wish the model to generate.
1109
 
1110
  - **[Repeat (number)]:**
1111
  Write how many times this prompt will repeat (instead of wasting another prompt segment on the same prompt).
@@ -1117,15 +1103,15 @@ def ui_full(launch_kwargs):
1117
  Calculates the timings of the prompt segments.
1118
 
1119
  - **[Duration (number)]:**
1120
- How long you want the generated music to be (in seconds).
1121
 
1122
  - **[Overlap (number)]:**
1123
  How much each new segment will reference the previous segment (in seconds).
1124
- For example, if you choose 20s: Each new segment after the first one will reference the previous segment 20s
1125
- and will generate only 10s of new music. The model can only process 30s of music.
1126
 
1127
  - **[Seed (number)]:**
1128
- Your generated music id. If you wish to generate the exact same music,
1129
  place the exact seed with the exact prompts
1130
  (This way you can also extend specific song that was generated short).
1131
 
@@ -1143,16 +1129,12 @@ def ui_full(launch_kwargs):
1143
  `File` mode allows you to upload an audio file to use as input
1144
  `Mic` mode allows you to use your microphone as input
1145
 
1146
- - **[Input Audio Mode (selection)]:**
1147
- `Melody` mode only works with the melody model: it conditions the music generation to reference the melody
1148
- `Sample` mode works with any model: it gives a music sample to the model to generate its continuation.
1149
-
1150
  - **[Trim Start and Trim End (numbers)]:**
1151
  `Trim Start` set how much you'd like to trim the input audio from the start
1152
  `Trim End` same as the above but from the end
1153
 
1154
  - **[Input Audio (audio file)]:**
1155
- Input here the audio you wish to use with "melody" or "sample" mode.
1156
 
1157
  ---
1158
 
@@ -1187,29 +1169,6 @@ def ui_full(launch_kwargs):
1187
  - **[Output Audio Sample Rate (dropdown)]:**
1188
  The output audio sample rate, the model default is 32000.
1189
 
1190
- - **[Model (selection)]:**
1191
- Here you can choose which model you wish to use:
1192
- `melody` model is based on the medium model with a unique feature that lets you use melody conditioning
1193
- `small` model is trained on 300M parameters
1194
- `medium` model is trained on 1.5B parameters
1195
- `large` model is trained on 3.3B parameters
1196
- `custom` model runs the custom model that you provided.
1197
-
1198
- - **[Custom Model (selection)]:**
1199
- This dropdown will show you models that are placed in the `models` folder
1200
- you must select `custom` in the model options in order to use it.
1201
-
1202
- - **[Refresh (button)]:**
1203
- Refreshes the dropdown list for custom model.
1204
-
1205
- - **[Base Model (selection)]:**
1206
- Choose here the model that your custom model is based on.
1207
-
1208
- - **[Decoder (selection)]:**
1209
- Choose here the decoder that you wish to use:
1210
- `Default` is the default decoder
1211
- `MultiBand_Diffusion` is a decoder that uses diffusion to generate the audio.
1212
-
1213
  - **[Top-k (number)]:**
1214
  is a parameter used in text generation models, including music generation models. It determines the number of most likely next tokens to consider at each step of the generation process. The model ranks all possible tokens based on their predicted probabilities, and then selects the top-k tokens from the ranked list. The model then samples from this reduced set of tokens to determine the next token in the generated sequence. A smaller value of k results in a more focused and deterministic output, while a larger value of k allows for more diversity in the generated music.
1215
 
@@ -1223,106 +1182,118 @@ def ui_full(launch_kwargs):
1223
  refers to a technique used in some music generation models where a separate classifier network is trained to provide guidance or control over the generated music. This classifier is trained on labeled data to recognize specific musical characteristics or styles. During the generation process, the output of the generator model is evaluated by the classifier, and the generator is encouraged to produce music that aligns with the desired characteristics or style. This approach allows for more fine-grained control over the generated music, enabling users to specify certain attributes they want the model to capture.
1224
  """
1225
  )
1226
- with gr.Tab("AudioGen"):
1227
  gr.Markdown(
1228
  """
1229
- ### AudioGen
1230
  """
1231
  )
1232
  with gr.Row():
1233
  with gr.Column():
1234
  with gr.Tab("Generation"):
1235
  with gr.Accordion("Structure Prompts", open=False):
1236
- with gr.Row():
1237
- struc_prompts_a = gr.Checkbox(label="Enable", value=False, interactive=True, container=False)
1238
- global_prompt_a = gr.Text(label="Global Prompt", interactive=True, scale=3)
 
 
 
 
 
1239
  with gr.Row():
1240
- s_a = gr.Slider(1, max_textboxes, value=1, step=1, label="Prompts:", interactive=True, scale=2)
 
1241
  with gr.Column():
1242
- textboxes_a = []
1243
- prompts_a = []
1244
- repeats_a = []
1245
- calcs_a = []
1246
  with gr.Row():
1247
- text0_a = gr.Text(label="Input Text", interactive=True, scale=4)
1248
- prompts_a.append(text0_a)
1249
- drag0_a = gr.Number(label="Repeat", value=1, interactive=True, scale=1)
1250
- repeats_a.append(drag0_a)
1251
- calc0_a = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
1252
- calcs_a.append(calc0_a)
1253
  for i in range(max_textboxes):
1254
- with gr.Row(visible=False) as t_a:
1255
- text_a = gr.Text(label="Input Text", interactive=True, scale=3)
1256
- repeat_a = gr.Number(label="Repeat", minimum=1, value=1, interactive=True, scale=1)
1257
- calc_a = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
1258
- textboxes_a.append(t_a)
1259
- prompts_a.append(text_a)
1260
- repeats_a.append(repeat_a)
1261
- calcs_a.append(calc_a)
1262
- to_calc_a = gr.Button("Calculate Timings", variant="secondary")
1263
  with gr.Row():
1264
- duration_a = gr.Slider(minimum=1, maximum=300, value=10, step=1, label="Duration", interactive=True)
1265
  with gr.Row():
1266
- overlap_a = gr.Slider(minimum=1, maximum=9, value=2, step=1, label="Overlap", interactive=True)
1267
  with gr.Row():
1268
- seed_a = gr.Number(label="Seed", value=-1, scale=4, precision=0, interactive=True)
1269
- gr.Button('\U0001f3b2\ufe0f', scale=1).click(fn=lambda: -1, outputs=[seed_a], queue=False)
1270
- reuse_seed_a = gr.Button('\u267b\ufe0f', scale=1)
1271
 
1272
  with gr.Tab("Audio"):
1273
  with gr.Row():
1274
  with gr.Column():
1275
- input_type_a = gr.Radio(["file", "mic"], value="file", label="Input Type (optional)", interactive=True)
1276
- mode_a = gr.Radio(["sample"], label="Input Audio Mode (optional)", value="sample", interactive=False, visible=False)
1277
  with gr.Row():
1278
- trim_start_a = gr.Number(label="Trim Start", value=0, interactive=True)
1279
- trim_end_a = gr.Number(label="Trim End", value=0, interactive=True)
1280
- audio_a = gr.Audio(source="upload", type="numpy", label="Input Audio (optional)", interactive=True)
1281
 
1282
  with gr.Tab("Customization"):
1283
  with gr.Row():
1284
  with gr.Column():
1285
- background_a = gr.ColorPicker(value="#0f0f0f", label="background color", interactive=True, scale=0)
1286
- bar1_a = gr.ColorPicker(value="#84cc16", label="bar color start", interactive=True, scale=0)
1287
- bar2_a = gr.ColorPicker(value="#10b981", label="bar color end", interactive=True, scale=0)
1288
  with gr.Column():
1289
- image_a = gr.Image(label="Background Image", type="filepath", interactive=True, scale=4)
1290
  with gr.Row():
1291
- height_a = gr.Number(label="Height", value=512, interactive=True)
1292
- width_a = gr.Number(label="Width", value=768, interactive=True)
1293
 
1294
  with gr.Tab("Settings"):
1295
  with gr.Row():
1296
- channel_a = gr.Radio(["mono", "stereo", "stereo effect"], label="Output Audio Channels", value="stereo", interactive=True, scale=1)
1297
- sr_select_a = gr.Dropdown(["11025", "16000", "22050", "24000", "32000", "44100", "48000"], label="Output Audio Sample Rate", value="48000", interactive=True)
1298
  with gr.Row():
1299
- model_a = gr.Radio(["medium"], label="Model", value="medium", interactive=False, visible=False)
1300
- decoder_a = gr.Radio(["Default"], label="Decoder", value="Default", interactive=False, visible=False)
 
 
 
1301
  with gr.Row():
1302
- topk_a = gr.Number(label="Top-k", value=250, interactive=True)
1303
- topp_a = gr.Number(label="Top-p", value=0, interactive=True)
1304
- temperature_a = gr.Number(label="Temperature", value=1.0, interactive=True)
1305
- cfg_coef_a = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
 
 
1306
  with gr.Row():
1307
- submit_a = gr.Button("Generate", variant="primary")
 
1308
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
1309
- with gr.Column():
1310
  with gr.Tab("Output"):
1311
- output_a = gr.Video(label="Generated Audio", scale=0)
1312
  with gr.Row():
1313
- audio_only_a = gr.Audio(type="numpy", label="Audio Only", interactive=False)
1314
- backup_only_a = gr.Audio(type="numpy", label="Backup Audio", interactive=False, visible=False)
1315
- send_audio_a = gr.Button("Send to Input Audio")
1316
- seed_used_a = gr.Number(label='Seed used', value=-1, interactive=False)
1317
- download_a = gr.File(label="Generated Files", interactive=False)
1318
  with gr.Tab("Wiki"):
1319
  gr.Markdown(
1320
  """
1321
  - **[Generate (button)]:**
1322
- Generates the audio with the given settings and prompts.
1323
 
1324
  - **[Interrupt (button)]:**
1325
- Stops the audio generation as soon as it can, providing an incomplete output.
1326
 
1327
  ---
1328
 
@@ -1336,21 +1307,30 @@ def ui_full(launch_kwargs):
1336
  - **[Structure Prompts (checkbox)]:**
1337
  Enable/Disable the structure prompts feature.
1338
 
 
 
 
 
 
 
 
 
 
1339
  - **[Global Prompt (text)]:**
1340
  Here write the prompt that you wish to be used for all prompt segments.
1341
 
1342
  #### Multi-Prompt:
1343
 
1344
- This feature allows you to control the audio, adding variation to different time segments.
1345
- You have up to 10 prompt segments. the first prompt will always be 10s long
1346
- the other prompts will be [10s - overlap].
1347
- for example if the overlap is 2s, each prompt segment will be 8s.
1348
 
1349
  - **[Prompt Segments (number)]:**
1350
- Amount of unique prompt to generate throughout the audio generation.
1351
 
1352
  - **[Prompt/Input Text (prompt)]:**
1353
- Here describe the audio you wish the model to generate.
1354
 
1355
  - **[Repeat (number)]:**
1356
  Write how many times this prompt will repeat (instead of wasting another prompt segment on the same prompt).
@@ -1362,15 +1342,15 @@ def ui_full(launch_kwargs):
1362
  Calculates the timings of the prompt segments.
1363
 
1364
  - **[Duration (number)]:**
1365
- How long you want the generated audio to be (in seconds).
1366
 
1367
  - **[Overlap (number)]:**
1368
  How much each new segment will reference the previous segment (in seconds).
1369
- For example, if you choose 2s: Each new segment after the first one will reference the previous segment 2s
1370
- and will generate only 8s of new audio. The model can only process 10s of music.
1371
 
1372
  - **[Seed (number)]:**
1373
- Your generated audio id. If you wish to generate the exact same audio,
1374
  place the exact seed with the exact prompts
1375
  (This way you can also extend specific song that was generated short).
1376
 
@@ -1388,12 +1368,16 @@ def ui_full(launch_kwargs):
1388
  `File` mode allows you to upload an audio file to use as input
1389
  `Mic` mode allows you to use your microphone as input
1390
 
 
 
 
 
1391
  - **[Trim Start and Trim End (numbers)]:**
1392
  `Trim Start` set how much you'd like to trim the input audio from the start
1393
  `Trim End` same as the above but from the end
1394
 
1395
  - **[Input Audio (audio file)]:**
1396
- Input here the audio you wish to use.
1397
 
1398
  ---
1399
 
@@ -1428,6 +1412,29 @@ def ui_full(launch_kwargs):
1428
  - **[Output Audio Sample Rate (dropdown)]:**
1429
  The output audio sample rate, the model default is 32000.
1430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1431
  - **[Top-k (number)]:**
1432
  is a parameter used in text generation models, including music generation models. It determines the number of most likely next tokens to consider at each step of the generation process. The model ranks all possible tokens based on their predicted probabilities, and then selects the top-k tokens from the ranked list. The model then samples from this reduced set of tokens to determine the next token in the generated sequence. A smaller value of k results in a more focused and deterministic output, while a larger value of k allows for more diversity in the generated music.
1433
 
 
188
  return "No tags found. Either the file is not generated by MusicGen+ V1.2.7 and higher or the tags are corrupted. (Discord removes metadata from mp4 and wav files, so you can't use them)"
189
  json_string = song.tags['COMMENT'][0]
190
  data = json.loads(json_string)
191
+
192
  global_prompt = str("\nGlobal Prompt: " + (data['global_prompt'] if data['global_prompt'] != "" else "none")) if 'global_prompt' in data else ""
193
  bpm = str("\nBPM: " + data['bpm']) if 'bpm' in data else ""
194
  key = str("\nKey: " + data['key']) if 'key' in data else ""
 
213
  version = str("Version: " + data['version']) if 'version' in data else "Version: Unknown"
214
  info = str(version + global_prompt + bpm + key + scale + prompts + duration + overlap + seed + audio_mode + input_length + channel + sr_select + model + custom_model + base_model + decoder + topk + topp + temperature + cfg_coef)
215
  if info == "":
216
+ return "No tags found. Either the file is not generated by
217
+ + V1.2.7 and higher or the tags are corrupted. (Discord removes metadata from mp4 and wav files, so you can't use them)"
218
  return info
219
  else:
220
  with open(audio_path.name) as json_file:
221
  data = json.load(json_file)
222
  #if 'global_prompt' not in data:
223
+ #return "No tags found. Either the file is not generated by
224
+ + V1.2.8a and higher or the tags are corrupted."
225
  global_prompt = str("\nGlobal Prompt: " + (data['global_prompt'] if data['global_prompt'] != "" else "none")) if 'global_prompt' in data else ""
226
  bpm = str("\nBPM: " + data['bpm']) if 'bpm' in data else ""
227
  key = str("\nKey: " + data['key']) if 'key' in data else ""
 
246
  version = str("Version: " + data['version']) if 'version' in data else "Version: Unknown"
247
  info = str(version + global_prompt + bpm + key + scale + prompts + duration + overlap + seed + audio_mode + input_length + channel + sr_select + model + custom_model + base_model + decoder + topk + topp + temperature + cfg_coef)
248
  if info == "":
249
+ return "No tags found. Either the file is not generated by
250
+ + V1.2.7 and higher or the tags are corrupted."
251
  return info
252
  else:
253
  return "Only .wav ,.mp4 and .json files are supported"
 
471
  global MBD
472
  if MBD is None:
473
  print("loading MBD")
474
+ MBD = MultiBandDiffusion.get_mbd_
475
+ ()
476
 
477
 
478
  def unload_diffusion():
 
857
 
858
  if gen_type == "music":
859
  model_shrt = model
860
+ model = "GrandaddyShmax/
861
+ -" + model
862
  elif gen_type == "audio":
863
  model_shrt = model
864
  model = "GrandaddyShmax/audiogen-" + model
865
  base_model_shrt = base_model
866
+ base_model = "GrandaddyShmax/
867
+ -" + base_model
868
 
869
  if MODEL is None or MODEL.name != (model):
870
  load_model(model, custom_model, base_model, gen_type)
 
964
  Welcome to Soundscapes - TulipAI’s flagship Audio Storytelling Toolkit. Designed with modern content creators in mind, our AI-driven platform generates audio sound effects in just minutes tailored to your unique needs.
965
  """
966
  )
967
+ with gr.Tab("AudioGen"):
968
  gr.Markdown(
969
  """
970
+ ### AudioGen
971
  """
972
  )
973
  with gr.Row():
974
  with gr.Column():
975
  with gr.Tab("Generation"):
976
  with gr.Accordion("Structure Prompts", open=False):
977
+ with gr.Row():
978
+ struc_prompts_a = gr.Checkbox(label="Enable", value=False, interactive=True, container=False)
979
+ global_prompt_a = gr.Text(label="Global Prompt", interactive=True, scale=3)
 
 
 
 
 
980
  with gr.Row():
981
+ s_a = gr.Slider(1, max_textboxes, value=1, step=1, label="Prompts:", interactive=True, scale=2)
 
982
  with gr.Column():
983
+ textboxes_a = []
984
+ prompts_a = []
985
+ repeats_a = []
986
+ calcs_a = []
987
  with gr.Row():
988
+ text0_a = gr.Text(label="Input Text", interactive=True, scale=4)
989
+ prompts_a.append(text0_a)
990
+ drag0_a = gr.Number(label="Repeat", value=1, interactive=True, scale=1)
991
+ repeats_a.append(drag0_a)
992
+ calc0_a = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
993
+ calcs_a.append(calc0_a)
994
  for i in range(max_textboxes):
995
+ with gr.Row(visible=False) as t_a:
996
+ text_a = gr.Text(label="Input Text", interactive=True, scale=3)
997
+ repeat_a = gr.Number(label="Repeat", minimum=1, value=1, interactive=True, scale=1)
998
+ calc_a = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
999
+ textboxes_a.append(t_a)
1000
+ prompts_a.append(text_a)
1001
+ repeats_a.append(repeat_a)
1002
+ calcs_a.append(calc_a)
1003
+ to_calc_a = gr.Button("Calculate Timings", variant="secondary")
1004
  with gr.Row():
1005
+ duration_a = gr.Slider(minimum=1, maximum=300, value=10, step=1, label="Duration", interactive=True)
1006
  with gr.Row():
1007
+ overlap_a = gr.Slider(minimum=1, maximum=9, value=2, step=1, label="Overlap", interactive=True)
1008
  with gr.Row():
1009
+ seed_a = gr.Number(label="Seed", value=-1, scale=4, precision=0, interactive=True)
1010
+ gr.Button('\U0001f3b2\ufe0f', scale=1).click(fn=lambda: -1, outputs=[seed_a], queue=False)
1011
+ reuse_seed_a = gr.Button('\u267b\ufe0f', scale=1)
1012
 
1013
  with gr.Tab("Audio"):
1014
  with gr.Row():
1015
  with gr.Column():
1016
+ input_type_a = gr.Radio(["file", "mic"], value="file", label="Input Type (optional)", interactive=True)
1017
+ mode_a = gr.Radio(["sample"], label="Input Audio Mode (optional)", value="sample", interactive=False, visible=False)
1018
  with gr.Row():
1019
+ trim_start_a = gr.Number(label="Trim Start", value=0, interactive=True)
1020
+ trim_end_a = gr.Number(label="Trim End", value=0, interactive=True)
1021
+ audio_a = gr.Audio(source="upload", type="numpy", label="Input Audio (optional)", interactive=True)
1022
 
1023
  with gr.Tab("Customization"):
1024
  with gr.Row():
1025
  with gr.Column():
1026
+ background_a = gr.ColorPicker(value="#0f0f0f", label="background color", interactive=True, scale=0)
1027
+ bar1_a = gr.ColorPicker(value="#84cc16", label="bar color start", interactive=True, scale=0)
1028
+ bar2_a = gr.ColorPicker(value="#10b981", label="bar color end", interactive=True, scale=0)
1029
  with gr.Column():
1030
+ image_a = gr.Image(label="Background Image", type="filepath", interactive=True, scale=4)
1031
  with gr.Row():
1032
+ height_a = gr.Number(label="Height", value=512, interactive=True)
1033
+ width_a = gr.Number(label="Width", value=768, interactive=True)
1034
 
1035
  with gr.Tab("Settings"):
1036
  with gr.Row():
1037
+ channel_a = gr.Radio(["mono", "stereo", "stereo effect"], label="Output Audio Channels", value="stereo", interactive=True, scale=1)
1038
+ sr_select_a = gr.Dropdown(["11025", "16000", "22050", "24000", "32000", "44100", "48000"], label="Output Audio Sample Rate", value="48000", interactive=True)
 
 
 
 
 
 
1039
  with gr.Row():
1040
+ model_a = gr.Radio(["medium"], label="Model", value="medium", interactive=False, visible=False)
1041
+ decoder_a = gr.Radio(["Default"], label="Decoder", value="Default", interactive=False, visible=False)
1042
  with gr.Row():
1043
+ topk_a = gr.Number(label="Top-k", value=250, interactive=True)
1044
+ topp_a = gr.Number(label="Top-p", value=0, interactive=True)
1045
+ temperature_a = gr.Number(label="Temperature", value=1.0, interactive=True)
1046
+ cfg_coef_a = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
1047
  with gr.Row():
1048
+ submit_a = gr.Button("Generate", variant="primary")
 
1049
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
1050
+ with gr.Column():
1051
  with gr.Tab("Output"):
1052
+ output_a = gr.Video(label="Generated Audio", scale=0)
1053
  with gr.Row():
1054
+ audio_only_a = gr.Audio(type="numpy", label="Audio Only", interactive=False)
1055
+ backup_only_a = gr.Audio(type="numpy", label="Backup Audio", interactive=False, visible=False)
1056
+ send_audio_a = gr.Button("Send to Input Audio")
1057
+ seed_used_a = gr.Number(label='Seed used', value=-1, interactive=False)
1058
+ download_a = gr.File(label="Generated Files", interactive=False)
1059
  with gr.Tab("Wiki"):
1060
  gr.Markdown(
1061
  """
1062
  - **[Generate (button)]:**
1063
+ Generates the audio with the given settings and prompts.
1064
 
1065
  - **[Interrupt (button)]:**
1066
+ Stops the audio generation as soon as it can, providing an incomplete output.
1067
 
1068
  ---
1069
 
 
1077
  - **[Structure Prompts (checkbox)]:**
1078
  Enable/Disable the structure prompts feature.
1079
 
 
 
 
 
 
 
 
 
 
1080
  - **[Global Prompt (text)]:**
1081
  Here write the prompt that you wish to be used for all prompt segments.
1082
 
1083
  #### Multi-Prompt:
1084
 
1085
+ This feature allows you to control the audio, adding variation to different time segments.
1086
+ You have up to 10 prompt segments. the first prompt will always be 10s long
1087
+ the other prompts will be [10s - overlap].
1088
+ for example if the overlap is 2s, each prompt segment will be 8s.
1089
 
1090
  - **[Prompt Segments (number)]:**
1091
+ Amount of unique prompt to generate throughout the audio generation.
1092
 
1093
  - **[Prompt/Input Text (prompt)]:**
1094
+ Here describe the audio you wish the model to generate.
1095
 
1096
  - **[Repeat (number)]:**
1097
  Write how many times this prompt will repeat (instead of wasting another prompt segment on the same prompt).
 
1103
  Calculates the timings of the prompt segments.
1104
 
1105
  - **[Duration (number)]:**
1106
+ How long you want the generated audio to be (in seconds).
1107
 
1108
  - **[Overlap (number)]:**
1109
  How much each new segment will reference the previous segment (in seconds).
1110
+ For example, if you choose 2s: Each new segment after the first one will reference the previous segment 2s
1111
+ and will generate only 8s of new audio. The model can only process 10s of music.
1112
 
1113
  - **[Seed (number)]:**
1114
+ Your generated audio id. If you wish to generate the exact same audio,
1115
  place the exact seed with the exact prompts
1116
  (This way you can also extend specific song that was generated short).
1117
 
 
1129
  `File` mode allows you to upload an audio file to use as input
1130
  `Mic` mode allows you to use your microphone as input
1131
 
 
 
 
 
1132
  - **[Trim Start and Trim End (numbers)]:**
1133
  `Trim Start` set how much you'd like to trim the input audio from the start
1134
  `Trim End` same as the above but from the end
1135
 
1136
  - **[Input Audio (audio file)]:**
1137
+ Input here the audio you wish to use.
1138
 
1139
  ---
1140
 
 
1169
  - **[Output Audio Sample Rate (dropdown)]:**
1170
  The output audio sample rate, the model default is 32000.
1171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1172
  - **[Top-k (number)]:**
1173
  is a parameter used in text generation models, including music generation models. It determines the number of most likely next tokens to consider at each step of the generation process. The model ranks all possible tokens based on their predicted probabilities, and then selects the top-k tokens from the ranked list. The model then samples from this reduced set of tokens to determine the next token in the generated sequence. A smaller value of k results in a more focused and deterministic output, while a larger value of k allows for more diversity in the generated music.
1174
 
 
1182
  refers to a technique used in some music generation models where a separate classifier network is trained to provide guidance or control over the generated music. This classifier is trained on labeled data to recognize specific musical characteristics or styles. During the generation process, the output of the generator model is evaluated by the classifier, and the generator is encouraged to produce music that aligns with the desired characteristics or style. This approach allows for more fine-grained control over the generated music, enabling users to specify certain attributes they want the model to capture.
1183
  """
1184
  )
1185
+ with gr.Tab("MusicGen"):
1186
  gr.Markdown(
1187
  """
1188
+ ### MusicGen
1189
  """
1190
  )
1191
  with gr.Row():
1192
  with gr.Column():
1193
  with gr.Tab("Generation"):
1194
  with gr.Accordion("Structure Prompts", open=False):
1195
+ with gr.Column():
1196
+ with gr.Row():
1197
+ struc_prompts = gr.Checkbox(label="Enable", value=False, interactive=True, container=False)
1198
+ bpm = gr.Number(label="BPM", value=120, interactive=True, scale=1, precision=0)
1199
+ key = gr.Dropdown(["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "Bb", "B"], label="Key", value="C", interactive=True)
1200
+ scale = gr.Dropdown(["Major", "Minor"], label="Scale", value="Major", interactive=True)
1201
+ with gr.Row():
1202
+ global_prompt = gr.Text(label="Global Prompt", interactive=True, scale=3)
1203
  with gr.Row():
1204
+ s = gr.Slider(1, max_textboxes, value=1, step=1, label="Prompts:", interactive=True, scale=2)
1205
+ #s_mode = gr.Radio(["segmentation", "batch"], value="segmentation", interactive=True, scale=1, label="Generation Mode")
1206
  with gr.Column():
1207
+ textboxes = []
1208
+ prompts = []
1209
+ repeats = []
1210
+ calcs = []
1211
  with gr.Row():
1212
+ text0 = gr.Text(label="Input Text", interactive=True, scale=4)
1213
+ prompts.append(text0)
1214
+ drag0 = gr.Number(label="Repeat", value=1, interactive=True, scale=1)
1215
+ repeats.append(drag0)
1216
+ calc0 = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
1217
+ calcs.append(calc0)
1218
  for i in range(max_textboxes):
1219
+ with gr.Row(visible=False) as t:
1220
+ text = gr.Text(label="Input Text", interactive=True, scale=3)
1221
+ repeat = gr.Number(label="Repeat", minimum=1, value=1, interactive=True, scale=1)
1222
+ calc = gr.Text(interactive=False, value="00:00 - 00:00", scale=1, label="Time")
1223
+ textboxes.append(t)
1224
+ prompts.append(text)
1225
+ repeats.append(repeat)
1226
+ calcs.append(calc)
1227
+ to_calc = gr.Button("Calculate Timings", variant="secondary")
1228
  with gr.Row():
1229
+ duration = gr.Slider(minimum=1, maximum=300, value=10, step=1, label="Duration", interactive=True)
1230
  with gr.Row():
1231
+ overlap = gr.Slider(minimum=1, maximum=29, value=12, step=1, label="Overlap", interactive=True)
1232
  with gr.Row():
1233
+ seed = gr.Number(label="Seed", value=-1, scale=4, precision=0, interactive=True)
1234
+ gr.Button('\U0001f3b2\ufe0f', scale=1).click(fn=lambda: -1, outputs=[seed], queue=False)
1235
+ reuse_seed = gr.Button('\u267b\ufe0f', scale=1)
1236
 
1237
  with gr.Tab("Audio"):
1238
  with gr.Row():
1239
  with gr.Column():
1240
+ input_type = gr.Radio(["file", "mic"], value="file", label="Input Type (optional)", interactive=True)
1241
+ mode = gr.Radio(["melody", "sample"], label="Input Audio Mode (optional)", value="sample", interactive=True)
1242
  with gr.Row():
1243
+ trim_start = gr.Number(label="Trim Start", value=0, interactive=True)
1244
+ trim_end = gr.Number(label="Trim End", value=0, interactive=True)
1245
+ audio = gr.Audio(source="upload", type="numpy", label="Input Audio (optional)", interactive=True)
1246
 
1247
  with gr.Tab("Customization"):
1248
  with gr.Row():
1249
  with gr.Column():
1250
+ background = gr.ColorPicker(value="#0f0f0f", label="background color", interactive=True, scale=0)
1251
+ bar1 = gr.ColorPicker(value="#84cc16", label="bar color start", interactive=True, scale=0)
1252
+ bar2 = gr.ColorPicker(value="#10b981", label="bar color end", interactive=True, scale=0)
1253
  with gr.Column():
1254
+ image = gr.Image(label="Background Image", type="filepath", interactive=True, scale=4)
1255
  with gr.Row():
1256
+ height = gr.Number(label="Height", value=512, interactive=True)
1257
+ width = gr.Number(label="Width", value=768, interactive=True)
1258
 
1259
  with gr.Tab("Settings"):
1260
  with gr.Row():
1261
+ channel = gr.Radio(["mono", "stereo", "stereo effect"], label="Output Audio Channels", value="stereo", interactive=True, scale=1)
1262
+ sr_select = gr.Dropdown(["11025", "16000", "22050", "24000", "32000", "44100", "48000"], label="Output Audio Sample Rate", value="48000", interactive=True)
1263
  with gr.Row():
1264
+ model = gr.Radio(["melody", "small", "medium", "large", "custom"], label="Model", value="large", interactive=True, scale=1)
1265
+ with gr.Column():
1266
+ dropdown = gr.Dropdown(choices=get_available_models(), value=("No models found" if len(get_available_models()) < 1 else get_available_models()[0]), label='Custom Model (models folder)', elem_classes='slim-dropdown', interactive=True)
1267
+ ui.create_refresh_button(dropdown, lambda: None, lambda: {'choices': get_available_models()}, 'refresh-button')
1268
+ basemodel = gr.Radio(["small", "medium", "melody", "large"], label="Base Model", value="medium", interactive=True, scale=1)
1269
  with gr.Row():
1270
+ decoder = gr.Radio(["Default", "MultiBand_Diffusion"], label="Decoder", value="Default", interactive=True)
1271
+ with gr.Row():
1272
+ topk = gr.Number(label="Top-k", value=250, interactive=True)
1273
+ topp = gr.Number(label="Top-p", value=0, interactive=True)
1274
+ temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
1275
+ cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
1276
  with gr.Row():
1277
+ submit = gr.Button("Generate", variant="primary")
1278
+ # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
1279
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
1280
+ with gr.Column() as c:
1281
  with gr.Tab("Output"):
1282
+ output = gr.Video(label="Generated Music", scale=0)
1283
  with gr.Row():
1284
+ audio_only = gr.Audio(type="numpy", label="Audio Only", interactive=False)
1285
+ backup_only = gr.Audio(type="numpy", label="Backup Audio", interactive=False, visible=False)
1286
+ send_audio = gr.Button("Send to Input Audio")
1287
+ seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
1288
+ download = gr.File(label="Generated Files", interactive=False)
1289
  with gr.Tab("Wiki"):
1290
  gr.Markdown(
1291
  """
1292
  - **[Generate (button)]:**
1293
+ Generates the music with the given settings and prompts.
1294
 
1295
  - **[Interrupt (button)]:**
1296
+ Stops the music generation as soon as it can, providing an incomplete output.
1297
 
1298
  ---
1299
 
 
1307
  - **[Structure Prompts (checkbox)]:**
1308
  Enable/Disable the structure prompts feature.
1309
 
1310
+ - **[BPM (number)]:**
1311
+ Beats per minute of the generated music.
1312
+
1313
+ - **[Key (dropdown)]:**
1314
+ The key of the generated music.
1315
+
1316
+ - **[Scale (dropdown)]:**
1317
+ The scale of the generated music.
1318
+
1319
  - **[Global Prompt (text)]:**
1320
  Here write the prompt that you wish to be used for all prompt segments.
1321
 
1322
  #### Multi-Prompt:
1323
 
1324
+ This feature allows you to control the music, adding variation to different time segments.
1325
+ You have up to 10 prompt segments. the first prompt will always be 30s long
1326
+ the other prompts will be [30s - overlap].
1327
+ for example if the overlap is 10s, each prompt segment will be 20s.
1328
 
1329
  - **[Prompt Segments (number)]:**
1330
+ Amount of unique prompt to generate throughout the music generation.
1331
 
1332
  - **[Prompt/Input Text (prompt)]:**
1333
+ Here describe the music you wish the model to generate.
1334
 
1335
  - **[Repeat (number)]:**
1336
  Write how many times this prompt will repeat (instead of wasting another prompt segment on the same prompt).
 
1342
  Calculates the timings of the prompt segments.
1343
 
1344
  - **[Duration (number)]:**
1345
+ How long you want the generated music to be (in seconds).
1346
 
1347
  - **[Overlap (number)]:**
1348
  How much each new segment will reference the previous segment (in seconds).
1349
+ For example, if you choose 20s: Each new segment after the first one will reference the previous segment 20s
1350
+ and will generate only 10s of new music. The model can only process 30s of music.
1351
 
1352
  - **[Seed (number)]:**
1353
+ Your generated music id. If you wish to generate the exact same music,
1354
  place the exact seed with the exact prompts
1355
  (This way you can also extend specific song that was generated short).
1356
 
 
1368
  `File` mode allows you to upload an audio file to use as input
1369
  `Mic` mode allows you to use your microphone as input
1370
 
1371
+ - **[Input Audio Mode (selection)]:**
1372
+ `Melody` mode only works with the melody model: it conditions the music generation to reference the melody
1373
+ `Sample` mode works with any model: it gives a music sample to the model to generate its continuation.
1374
+
1375
  - **[Trim Start and Trim End (numbers)]:**
1376
  `Trim Start` set how much you'd like to trim the input audio from the start
1377
  `Trim End` same as the above but from the end
1378
 
1379
  - **[Input Audio (audio file)]:**
1380
+ Input here the audio you wish to use with "melody" or "sample" mode.
1381
 
1382
  ---
1383
 
 
1412
  - **[Output Audio Sample Rate (dropdown)]:**
1413
  The output audio sample rate, the model default is 32000.
1414
 
1415
+ - **[Model (selection)]:**
1416
+ Here you can choose which model you wish to use:
1417
+ `melody` model is based on the medium model with a unique feature that lets you use melody conditioning
1418
+ `small` model is trained on 300M parameters
1419
+ `medium` model is trained on 1.5B parameters
1420
+ `large` model is trained on 3.3B parameters
1421
+ `custom` model runs the custom model that you provided.
1422
+
1423
+ - **[Custom Model (selection)]:**
1424
+ This dropdown will show you models that are placed in the `models` folder
1425
+ you must select `custom` in the model options in order to use it.
1426
+
1427
+ - **[Refresh (button)]:**
1428
+ Refreshes the dropdown list for custom model.
1429
+
1430
+ - **[Base Model (selection)]:**
1431
+ Choose here the model that your custom model is based on.
1432
+
1433
+ - **[Decoder (selection)]:**
1434
+ Choose here the decoder that you wish to use:
1435
+ `Default` is the default decoder
1436
+ `MultiBand_Diffusion` is a decoder that uses diffusion to generate the audio.
1437
+
1438
  - **[Top-k (number)]:**
1439
  is a parameter used in text generation models, including music generation models. It determines the number of most likely next tokens to consider at each step of the generation process. The model ranks all possible tokens based on their predicted probabilities, and then selects the top-k tokens from the ranked list. The model then samples from this reduced set of tokens to determine the next token in the generated sequence. A smaller value of k results in a more focused and deterministic output, while a larger value of k allows for more diversity in the generated music.
1440