Spaces:
Runtime error
Runtime error
| import IPython | |
| from huggingface_hub.inference_api import InferenceApi | |
| import torch | |
| from TTS.api import TTS | |
| import wave | |
| from espeakng import ESpeakNG | |
| import subprocess | |
| from scipy.io import wavfile | |
| from transformers import pipeline | |
| import os | |
| def synth_mms(text:str, model:str): | |
| ''' | |
| Use Huggingface inference pipeline to synthesize text. | |
| (Can be replaced by inference API, but that requires stored API token.) | |
| Inputs: | |
| text: Text to synthesze | |
| model: Model code of the form mms-tts-LAN | |
| Returns: | |
| Streaming numpy and sampling rate. | |
| ''' | |
| #inference = InferenceApi(repo_id=f"facebook/{model}", | |
| # token=API_TOKEN) | |
| #mms_tts = inference(inputs=text, | |
| # raw_response=True)._content | |
| if model is not None: | |
| pipe = pipeline("text-to-speech", model=model, device=-1) # Change device if it should use GPU | |
| mms_tts = pipe(text) | |
| return mms_tts['audio'], mms_tts['sampling_rate'] | |
| else: | |
| return None | |
| def synth_coqui(text:str, model:str): | |
| ''' | |
| Use Coqui inference API to synthesize text. | |
| Inputs: | |
| text: Text to synthesze | |
| model: Model code | |
| Returns: | |
| Streaming Wav and sampling rate. | |
| ''' | |
| if model is not None: | |
| # Get device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Init TTS | |
| tts = TTS(model, progress_bar=False).to(device) | |
| tts.tts_to_file(text=text, file_path="test.wav", is_multi_speaker=False) | |
| sampling_rate, wav = wavfile.read('test.wav') | |
| os.remove("test.wav") | |
| #wav = tts.tts(text=text) | |
| return wav, sampling_rate | |
| else: | |
| return None | |
| def synth_espeakng(text:str, model:str): | |
| ''' | |
| Use ESpeak-NG to synthesize text. | |
| Inputs: | |
| text: Text to synthesze | |
| model: Model code | |
| Returns: | |
| Streaming Wav and sampling rate. | |
| ''' | |
| if model is not None: | |
| subprocess.run(['espeak-ng', f'-v{model}', "-w test.wav", text]).returncode | |
| sampling_rate, wav = wavfile.read('test.wav') | |
| os.remove("test.wav") | |
| #wav = tts.tts(text=text) | |
| return wav, sampling_rate | |
| else: | |
| return None | |