|
--- |
|
library_name: transformers |
|
tags: [] |
|
--- |
|
|
|
# Audio segmentation powered by speaker diarization |
|
|
|
```bash |
|
git clone https://github.com/nguyenvulebinh/audio-seg-diarization.git |
|
cd audio-seg-diarization && pip install -r requirements.txt |
|
``` |
|
|
|
```python |
|
from src.pyanet.pyanet_model import PyanNet |
|
from src.utils import segmentor |
|
import torch |
|
import torchaudio |
|
|
|
segmentation_model = PyanNet.from_pretrained("nguyenvulebinh/audio-seg-diarization").eval() |
|
|
|
if torch.cuda.is_available(): |
|
segmentation_model = segmentation_model.cuda() |
|
|
|
wav_path = "./resource/example.wav" |
|
wav, rate = torchaudio.load(wav_path) |
|
|
|
segments = segmentor(segmentation_model, wav, max_duration=25) |
|
|
|
# [{'start': 9568.527218750001, 'end': 9572.66159375, 'segments': [(9568.527218750001, 9572.66159375)]}] |
|
segments_wavs = [wav[0, int(seg['start'] * rate):int(seg['end'] * rate)] for seg in segments] |
|
``` |