Spaces:
Runtime error
Runtime error
| import os | |
| from typing import List | |
| import gradio as gr | |
| import tempfile | |
| import numpy as np | |
| import librosa | |
| import soundfile as sf | |
| import spaces | |
| from audiosr import super_resolution | |
| from audiosr_utils import load_audiosr | |
| audiosr_model = load_audiosr() | |
| def split_audio_to_chunks(y, sr=48000, chunk_duration=5.12) -> List[str]: | |
| # Calculate the number of samples per chunk | |
| chunk_samples = int(chunk_duration * sr) | |
| # Split the audio into chunks | |
| chunks = [y[i : i + chunk_samples] for i in range(0, len(y), chunk_samples)] | |
| # Save each chunk to a temporary file | |
| temp_files = [] | |
| for i, chunk in enumerate(chunks): | |
| # Create a temporary file | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| temp_files.append(temp_file.name) | |
| # Write the chunk to the temporary file | |
| sf.write(temp_file.name, chunk, sr) | |
| return temp_files | |
| def run_audiosr( | |
| chunks: List[str], guidance_scale: float, ddim_steps: int | |
| ) -> np.ndarray: | |
| waveforms = [] | |
| for i, chunk in enumerate(chunks): | |
| print(f"Processing chunk {i+1}/{len(chunks)}") | |
| waveform = super_resolution( | |
| audiosr_model, | |
| chunk, | |
| guidance_scale=guidance_scale, | |
| ddim_steps=ddim_steps, | |
| ) | |
| waveforms.append(waveform) | |
| waveform = np.concatenate(waveforms, axis=-1) # (1, 1, N) | |
| waveform = waveform.squeeze() | |
| return waveform | |
| def audiosr_infer(audio: str) -> str: | |
| guidance_scale = 3.5 | |
| ddim_steps = 100 | |
| y, sr = librosa.load(audio, sr=48000) | |
| if len(y) > 60 * sr: | |
| y = y[: 60 * sr] | |
| gr.Info("Audio is too long, only the first 60 seconds will be processed") | |
| chunk_files = split_audio_to_chunks(y, sr=sr, chunk_duration=5.12) | |
| print(f"Splited audio chunks: {chunk_files}") | |
| waveform = run_audiosr(chunk_files, guidance_scale, ddim_steps) | |
| sr = 44100 | |
| for chunk_file in chunk_files: | |
| os.remove(chunk_file) | |
| with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: | |
| sf.write(f.name, waveform, sr) | |
| return f.name | |
| models = { | |
| "AudioSR": audiosr_infer, | |
| } | |
| def infer(audio: str, model: str, sr: int) -> str: | |
| if sr > 0: | |
| # resample audio | |
| y, _ = librosa.load(audio, sr=sr) | |
| # save resampled audio | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| sf.write(f.name, y, sr) | |
| return models[model](f.name) | |
| else: | |
| return models[model](audio) | |
| with gr.Blocks() as app: | |
| with open(os.path.join(os.path.dirname(__file__), "README.md"), "r") as f: | |
| README = f.read() | |
| # remove yaml front matter | |
| blocks = README.split("---") | |
| if len(blocks) > 1: | |
| README = "---".join(blocks[2:]) | |
| gr.Markdown(README) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("## Upload an audio file") | |
| audio = gr.Audio(label="Upload an audio file", type="filepath") | |
| sr = gr.Slider( | |
| value=0, | |
| label="Resample audio to sample rate before inference, 0 means no resampling", | |
| minimum=0, | |
| maximum=48000, | |
| step=1000, | |
| ) | |
| with gr.Row(): | |
| model = gr.Radio( | |
| label="Select a model", | |
| choices=[s for s in models.keys()], | |
| value="AudioSR", | |
| ) | |
| btn = gr.Button("Infer") | |
| with gr.Row(): | |
| with gr.Column(): | |
| out = gr.Audio( | |
| label="Output", format="mp3", type="filepath", interactive=False | |
| ) | |
| btn.click( | |
| fn=infer, | |
| inputs=[audio, model, sr], | |
| outputs=[out], | |
| api_name="infer", | |
| ) | |
| app.launch(show_error=True) | |