from torch import load as torch_load from torch import save as torch_save from torch.hub import load as torch_hub_load from torchaudio import save as torchaudio_save from pydub import AudioSegment from torch import device as Device from torch.cuda import is_available as cuda_is_available import gradio as gr knn_vc = torch_hub_load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=Device("cuda" if cuda_is_available() else "cpu")) def process_audio(path): AudioSegment.from_file(path).set_frame_rate(16000).set_channels(1).export(path, format="wav") def voice_conversion(src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk): [process_audio(path) for path in ref_paths+[src_path]] torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), topk=topk)[None], 16000) return 'output.wav' def voices2model(ref_paths,ref_vad_trigger_level): [process_audio(path) for path in ref_paths] torch_save(knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), 'model.pt') return 'model.pt' def model2voice_conv(src_path,model_path,src_vad_trigger_level,topk): process_audio(src_path) torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), torch_load(model_path), topk=topk)[None], 16000) return 'output.wav' gr.TabbedInterface([gr.Interface(voice_conversion, [gr.Audio(type="filepath", label="Source Audio"),gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio")),gr.Interface(voices2model, [gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7)],gr.File(type="file", label="Model")),gr.Interface(model2voice_conv, [gr.Audio(type="filepath", label="Source Audio"),gr.File(type="file", label="Model"),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio"))],["Voice Conversion","Model Creation","Voice Conversion By Model"]).launch()