Sing-For-Me / vocal_isolation /short_time_fourier_transform.py
Jarod Castillo
init
bb70eb3
import torch
class STFT:
def __init__(self, n_fft, hop_length, dim_f):
self.n_fft = n_fft
self.hop_length = hop_length
self.window = torch.hann_window(window_length=n_fft, periodic=True)
self.dim_f = dim_f
def __call__(self, x):
window = self.window.to(x.device)
batch_dims = x.shape[:-2]
c, t = x.shape[-2:]
x = x.reshape([-1, t])
x = torch.stft(
x,
n_fft=self.n_fft,
hop_length=self.hop_length,
window=window,
center=True,
return_complex=True,
)
x = torch.view_as_real(x)
x = x.permute([0, 3, 1, 2])
x = x.reshape([*batch_dims, c, 2, -1, x.shape[-1]]).reshape(
[*batch_dims, c * 2, -1, x.shape[-1]]
)
return x[..., : self.dim_f, :]
def inverse(self, x):
window = self.window.to(x.device)
batch_dims = x.shape[:-3]
c, f, t = x.shape[-3:]
n = self.n_fft // 2 + 1
f_pad = torch.zeros([*batch_dims, c, n - f, t]).to(x.device)
x = torch.cat([x, f_pad], -2)
x = x.reshape([*batch_dims, c // 2, 2, n, t]).reshape([-1, 2, n, t])
x = x.permute([0, 2, 3, 1])
x = x.contiguous()
t_complex = torch.view_as_complex(x)
x = torch.istft(
t_complex,
n_fft=self.n_fft,
hop_length=self.hop_length,
window=window,
center=True,
)
x = x.reshape([*batch_dims, 2, -1])
return x