Spaces:
Sleeping
Sleeping
update
Browse files
main.py
CHANGED
@@ -22,6 +22,7 @@ import shutil
|
|
22 |
import tempfile
|
23 |
import time
|
24 |
from typing import Dict, Tuple
|
|
|
25 |
import zipfile
|
26 |
|
27 |
import gradio as gr
|
@@ -30,11 +31,11 @@ import librosa
|
|
30 |
import librosa.display
|
31 |
import matplotlib.pyplot as plt
|
32 |
import numpy as np
|
|
|
33 |
|
34 |
import log
|
35 |
from project_settings import environment, project_path, log_directory
|
36 |
from toolbox.os.command import Command
|
37 |
-
from toolbox.torchaudio.models.dfnet.inference_dfnet import InferenceDfNet
|
38 |
from toolbox.torchaudio.models.dfnet2.inference_dfnet2 import InferenceDfNet2
|
39 |
from toolbox.torchaudio.models.dtln.inference_dtln import InferenceDTLN
|
40 |
from toolbox.torchaudio.models.frcrn.inference_frcrn import InferenceFRCRN
|
@@ -79,6 +80,28 @@ def get_args():
|
|
79 |
return args
|
80 |
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
def shell(cmd: str):
|
83 |
return Command.popen(cmd)
|
84 |
|
@@ -131,6 +154,10 @@ def when_click_denoise_button(noisy_audio_file_t = None, noisy_audio_microphone_
|
|
131 |
noisy_audio_t: Tuple = noisy_audio_file_t or noisy_audio_microphone_t
|
132 |
|
133 |
sample_rate, signal = noisy_audio_t
|
|
|
|
|
|
|
|
|
134 |
audio_duration = signal.shape[-1] // 8000
|
135 |
|
136 |
# Test: 使用 microphone 时,显示采样率是 44100,但 signal 实际是按 8000 的采样率的。
|
|
|
22 |
import tempfile
|
23 |
import time
|
24 |
from typing import Dict, Tuple
|
25 |
+
import uuid
|
26 |
import zipfile
|
27 |
|
28 |
import gradio as gr
|
|
|
31 |
import librosa.display
|
32 |
import matplotlib.pyplot as plt
|
33 |
import numpy as np
|
34 |
+
from scipy.io import wavfile
|
35 |
|
36 |
import log
|
37 |
from project_settings import environment, project_path, log_directory
|
38 |
from toolbox.os.command import Command
|
|
|
39 |
from toolbox.torchaudio.models.dfnet2.inference_dfnet2 import InferenceDfNet2
|
40 |
from toolbox.torchaudio.models.dtln.inference_dtln import InferenceDTLN
|
41 |
from toolbox.torchaudio.models.frcrn.inference_frcrn import InferenceFRCRN
|
|
|
80 |
return args
|
81 |
|
82 |
|
83 |
+
def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
|
84 |
+
if signal.dtype != np.int16:
|
85 |
+
raise AssertionError(f"only support dtype np.int16, however: {signal.dtype}")
|
86 |
+
temp_audio_dir = Path(tempfile.gettempdir()) / "input_audio"
|
87 |
+
temp_audio_dir.mkdir(parents=True, exist_ok=True)
|
88 |
+
filename = temp_audio_dir / f"{uuid.uuid4()}.wav"
|
89 |
+
filename = filename.as_posix()
|
90 |
+
wavfile.write(
|
91 |
+
filename,
|
92 |
+
sample_rate, signal
|
93 |
+
)
|
94 |
+
return filename
|
95 |
+
|
96 |
+
|
97 |
+
def convert_sample_rate(signal: np.ndarray, sample_rate: int, target_sample_rate: int):
|
98 |
+
filename = save_input_audio(sample_rate, signal)
|
99 |
+
|
100 |
+
signal, _ = librosa.load(filename, sr=target_sample_rate)
|
101 |
+
signal = np.array(signal * (1 << 15), dtype=np.int16)
|
102 |
+
return signal
|
103 |
+
|
104 |
+
|
105 |
def shell(cmd: str):
|
106 |
return Command.popen(cmd)
|
107 |
|
|
|
154 |
noisy_audio_t: Tuple = noisy_audio_file_t or noisy_audio_microphone_t
|
155 |
|
156 |
sample_rate, signal = noisy_audio_t
|
157 |
+
if sample_rate != 8000:
|
158 |
+
signal = convert_sample_rate(signal, sample_rate, 8000)
|
159 |
+
sample_rate = 8000
|
160 |
+
|
161 |
audio_duration = signal.shape[-1] // 8000
|
162 |
|
163 |
# Test: 使用 microphone 时,显示采样率是 44100,但 signal 实际是按 8000 的采样率的。
|