fffiloni commited on
Commit
9710eda
·
verified ·
1 Parent(s): 3fe4f3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
  import torch, os
 
3
  import numpy as np
 
4
  from PIL import Image
5
  import matplotlib.pyplot as plt
6
  from huggingface_hub import snapshot_download
@@ -14,6 +16,18 @@ from converter import load_wav, mel_spectrogram, normalize_spectrogram, denormal
14
  from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
15
 
16
  # ——
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def save_spectrogram_image(spectrogram, filename):
19
  """Save a spectrogram as an image."""
@@ -34,6 +48,8 @@ def infer(prompt, progress=gr.Progress(track_tqdm=True)):
34
 
35
  def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
36
 
 
 
37
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
38
  dtype = torch.float16
39
  device = "cuda"
@@ -129,6 +145,8 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
129
 
130
  def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
131
 
 
 
132
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
133
  dtype = torch.float16
134
  device = "cuda"
 
1
  import gradio as gr
2
  import torch, os
3
+ import wave
4
  import numpy as np
5
+ from scipy.io.wavfile import write
6
  from PIL import Image
7
  import matplotlib.pyplot as plt
8
  from huggingface_hub import snapshot_download
 
16
  from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
17
 
18
  # ——
19
+ def convert_wav_to_16khz(input_path, output_path):
20
+ with wave.open(input_path, "rb") as wav_in:
21
+ params = wav_in.getparams()
22
+ channels, sampwidth, framerate, nframes = params[:4]
23
+
24
+ # Read and convert audio data
25
+ audio_data = np.frombuffer(wav_in.readframes(nframes), dtype=np.int16)
26
+ new_framerate = 16000
27
+
28
+ # Save as a new WAV file
29
+ write(output_path, new_framerate, audio_data)
30
+ return output_path
31
 
32
  def save_spectrogram_image(spectrogram, filename):
33
  """Save a spectrogram as an image."""
 
48
 
49
  def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
50
 
51
+ audio_path = convert_wav_to_16khz(audio_path, "output_16khz.wav")
52
+
53
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
54
  dtype = torch.float16
55
  device = "cuda"
 
145
 
146
  def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
147
 
148
+ audio_path = convert_wav_to_16khz(audio_path, "output_16khz.wav")
149
+
150
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
151
  dtype = torch.float16
152
  device = "cuda"