Audio segmentation powered by speaker diarization
git clone https://github.com/nguyenvulebinh/audio-seg-diarization.git
cd audio-seg-diarization && pip install -r requirements.txt
from src.pyanet.pyanet_model import PyanNet
from src.utils import segmentor
import torch
import torchaudio
segmentation_model = PyanNet.from_pretrained("nguyenvulebinh/audio-seg-diarization").eval()
if torch.cuda.is_available():
segmentation_model = segmentation_model.cuda()
wav_path = "./resource/example.wav"
wav, rate = torchaudio.load(wav_path)
segments = segmentor(segmentation_model, wav, max_duration=25)
# [{'start': 9568.527218750001, 'end': 9572.66159375, 'segments': [(9568.527218750001, 9572.66159375)]}]
segments_wavs = [wav[0, int(seg['start'] * rate):int(seg['end'] * rate)] for seg in segments]
- Downloads last month
- 222
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support