Spaces:
Build error
Build error
Update audio_foundation_models.py
Browse files
audio_foundation_models.py
CHANGED
|
@@ -71,7 +71,7 @@ def initialize_model_inpaint(config, ckpt):
|
|
| 71 |
sampler = DDIMSampler(model)
|
| 72 |
return sampler
|
| 73 |
def select_best_audio(prompt,wav_list):
|
| 74 |
-
clap_model = CLAPWrapper('useful_ckpts/CLAP/CLAP_weights_2022.pth','useful_ckpts/CLAP/config.yml',use_cuda=torch.cuda.is_available())
|
| 75 |
text_embeddings = clap_model.get_text_embeddings([prompt])
|
| 76 |
score_list = []
|
| 77 |
for data in wav_list:
|
|
@@ -132,7 +132,7 @@ class T2A:
|
|
| 132 |
def __init__(self, device):
|
| 133 |
print("Initializing Make-An-Audio to %s" % device)
|
| 134 |
self.device = device
|
| 135 |
-
self.sampler = initialize_model('configs/text-to-audio/txt2audio_args.yaml', 'useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
|
| 136 |
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
|
| 137 |
|
| 138 |
@prompts(name="Generate Audio From User Input Text",
|
|
@@ -185,8 +185,8 @@ class I2A:
|
|
| 185 |
def __init__(self, device):
|
| 186 |
print("Initializing Make-An-Audio-Image to %s" % device)
|
| 187 |
self.device = device
|
| 188 |
-
self.sampler = initialize_model('text_to_audio/
|
| 189 |
-
self.vocoder = VocoderBigVGAN('text_to_audio/
|
| 190 |
|
| 191 |
@prompts(name="Generate Audio From The Image",
|
| 192 |
description="useful for when you want to generate an audio "
|
|
@@ -345,8 +345,8 @@ class Inpaint:
|
|
| 345 |
def __init__(self, device):
|
| 346 |
print("Initializing Make-An-Audio-inpaint to %s" % device)
|
| 347 |
self.device = device
|
| 348 |
-
self.sampler = initialize_model_inpaint('text_to_audio/
|
| 349 |
-
self.vocoder = VocoderBigVGAN('
|
| 350 |
self.cmap_transform = matplotlib.cm.viridis
|
| 351 |
|
| 352 |
@prompts(name="Audio Inpainting",
|
|
|
|
| 71 |
sampler = DDIMSampler(model)
|
| 72 |
return sampler
|
| 73 |
def select_best_audio(prompt,wav_list):
|
| 74 |
+
clap_model = CLAPWrapper('text_to_audio/Make_An_Audio/useful_ckpts/CLAP/CLAP_weights_2022.pth','text_to_audio/Make_An_Audio/useful_ckpts/CLAP/config.yml',use_cuda=torch.cuda.is_available())
|
| 75 |
text_embeddings = clap_model.get_text_embeddings([prompt])
|
| 76 |
score_list = []
|
| 77 |
for data in wav_list:
|
|
|
|
| 132 |
def __init__(self, device):
|
| 133 |
print("Initializing Make-An-Audio to %s" % device)
|
| 134 |
self.device = device
|
| 135 |
+
self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/text-to-audio/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
|
| 136 |
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
|
| 137 |
|
| 138 |
@prompts(name="Generate Audio From User Input Text",
|
|
|
|
| 185 |
def __init__(self, device):
|
| 186 |
print("Initializing Make-An-Audio-Image to %s" % device)
|
| 187 |
self.device = device
|
| 188 |
+
self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
|
| 189 |
+
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
|
| 190 |
|
| 191 |
@prompts(name="Generate Audio From The Image",
|
| 192 |
description="useful for when you want to generate an audio "
|
|
|
|
| 345 |
def __init__(self, device):
|
| 346 |
print("Initializing Make-An-Audio-inpaint to %s" % device)
|
| 347 |
self.device = device
|
| 348 |
+
self.sampler = initialize_model_inpaint('text_to_audio/Make_An_Audio/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/inpaint7_epoch00047.ckpt')
|
| 349 |
+
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
|
| 350 |
self.cmap_transform = matplotlib.cm.viridis
|
| 351 |
|
| 352 |
@prompts(name="Audio Inpainting",
|