Spaces:

fffiloni
/

auffusion

Running on Zero

fffiloni commited on Feb 4

Commit

9710eda

verified ·

1 Parent(s): 3fe4f3e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 import torch, os
 import numpy as np
 from PIL import Image
 import matplotlib.pyplot as plt
 from huggingface_hub import snapshot_download
@@ -14,6 +16,18 @@ from converter import load_wav, mel_spectrogram, normalize_spectrogram, denormal
 from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
 # ——
 def save_spectrogram_image(spectrogram, filename):
     """Save a spectrogram as an image."""
@@ -34,6 +48,8 @@ def infer(prompt, progress=gr.Progress(track_tqdm=True)):
 def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
     pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
     dtype = torch.float16
     device = "cuda"
@@ -129,6 +145,8 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
 def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
     pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
     dtype = torch.float16
     device = "cuda"

 import gradio as gr
 import torch, os
+import wave
 import numpy as np
+from scipy.io.wavfile import write
 from PIL import Image
 import matplotlib.pyplot as plt
 from huggingface_hub import snapshot_download
 from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
 # ——
+def convert_wav_to_16khz(input_path, output_path):
+    with wave.open(input_path, "rb") as wav_in:
+        params = wav_in.getparams()
+        channels, sampwidth, framerate, nframes = params[:4]
+        # Read and convert audio data
+        audio_data = np.frombuffer(wav_in.readframes(nframes), dtype=np.int16)
+        new_framerate = 16000
+        # Save as a new WAV file
+        write(output_path, new_framerate, audio_data)
+        return output_path
 def save_spectrogram_image(spectrogram, filename):
     """Save a spectrogram as an image."""
 def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
+    audio_path = convert_wav_to_16khz(audio_path, "output_16khz.wav")
     pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
     dtype = torch.float16
     device = "cuda"
 def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
+    audio_path = convert_wav_to_16khz(audio_path, "output_16khz.wav")
     pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
     dtype = torch.float16
     device = "cuda"