import torch import IPython from TTS.api import TTS def convert_coqui(source_wav:str, target_wav:str): ''' Use Coqui TTS for zero-shot voice conversion. Inputs: source_wav: Wav of the thing you want to say. target_wav: Wav of the speaker you want to hear. Returns: Streaming wav and sampling rate. ''' # Get device device = "cuda" if torch.cuda.is_available() else "cpu" tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to(device) wav = tts.voice_conversion(source_wav=source_wav, target_wav=target_wav) return wav, 24000 # Identified sampling rate of freevc24