Spaces:
Sleeping
Sleeping
File size: 1,566 Bytes
79f7f06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import os.path
from io import BytesIO
from pathlib import Path
import numpy as np
import torch
from network.hubert.hubert_model import hubert_soft, get_units
from network.hubert.vec_model import load_model, get_vec_units
from utils.hparams import hparams
class Hubertencoder():
def __init__(self, pt_path=f'.checkpoints/hubert/hubert_soft.pt'):
if not 'use_vec' in hparams.keys():
hparams['use_vec'] = False
if hparams['use_vec']:
pt_path = f".checkpoints/vec/checkpoint_best_legacy_500.pt"
self.dev = torch.device("cuda")
self.hbt_model = load_model(pt_path)
else:
pt_path = list(Path(pt_path).parent.rglob('*.pt'))[0]
if 'hubert_gpu' in hparams.keys():
self.use_gpu = hparams['hubert_gpu']
else:
self.use_gpu = True
self.dev = torch.device("cuda" if self.use_gpu and torch.cuda.is_available() else "cpu")
self.hbt_model = hubert_soft(str(pt_path)).to(self.dev)
def encode(self, wav_path):
if isinstance(wav_path, BytesIO):
npy_path = ""
wav_path.seek(0)
else:
npy_path = Path(wav_path).with_suffix('.npy')
if os.path.exists(npy_path):
units = np.load(str(npy_path))
elif hparams['use_vec']:
units = get_vec_units(self.hbt_model, wav_path, self.dev).cpu().numpy()[0]
else:
units = get_units(self.hbt_model, wav_path, self.dev).cpu().numpy()[0]
return units # [T,256]
|