|
|
|
|
|
import shutil |
|
import os |
|
from gradio_client import Client |
|
|
|
|
|
output_dir = "temp_audio" |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
api_url = "http://127.0.0.1:7860/" |
|
client = Client(api_url) |
|
|
|
def text_to_speech( |
|
text="Hello!!", |
|
model_name="kokoro-v0_19.pth", |
|
voice_name="af_bella", |
|
speed=1, |
|
trim=0, |
|
pad_between_segments=0, |
|
remove_silence=False, |
|
minimum_silence=0.05, |
|
): |
|
""" |
|
Generates speech from text using a specified model and saves the audio file. |
|
|
|
Parameters: |
|
text (str): The text to convert to speech. |
|
model_name (str): The name of the model to use for synthesis. |
|
voice_name (str): The name of the voice to use. |
|
speed (float): The speed of speech. |
|
trim (int): Whether to trim silence at the beginning and end. |
|
pad_between_segments (int): Padding between audio segments. |
|
remove_silence (bool): Whether to remove silence from the audio. |
|
minimum_silence (float): Minimum silence duration to consider. |
|
Returns: |
|
str: Path to the saved audio file. |
|
""" |
|
|
|
result = client.predict( |
|
text=text, |
|
model_name=model_name, |
|
voice_name=voice_name, |
|
speed=speed, |
|
trim=trim, |
|
pad_between_segments=pad_between_segments, |
|
remove_silence=remove_silence, |
|
minimum_silence=minimum_silence, |
|
api_name="/text_to_speech" |
|
) |
|
|
|
|
|
save_at = f"{output_dir}/{os.path.basename(result)}" |
|
shutil.move(result, save_at) |
|
print(f"Saved at {save_at}") |
|
|
|
return save_at |
|
|
|
|
|
if __name__ == "__main__": |
|
text="This is Kokoro TTS. I am a text-to-speech model and Super Fast." |
|
model_name="kokoro-v0_19.pth" |
|
voice_name="af_bella" |
|
speed=1 |
|
only_trim_both_ends_silence=0 |
|
add_silence_between_segments=0 |
|
remove_silence=False |
|
keep_silence_upto=0.05 |
|
audio_path = text_to_speech(text=text, model_name=model_name, |
|
voice_name=voice_name, speed=speed, |
|
trim=only_trim_both_ends_silence, |
|
pad_between_segments=add_silence_between_segments, |
|
remove_silence=remove_silence, |
|
minimum_silence=keep_silence_upto) |
|
print(f"Audio file saved at: {audio_path}") |
|
|