Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import tempfile | |
import gradio as gr | |
from f5_tts.api import F5TTS | |
from huggingface_hub import hf_hub_download | |
import os | |
VARIANTS = ["model_800000", "model_300000", "model_100000"] | |
models = {} | |
for variant in VARIANTS: | |
models[variant] = F5TTS( | |
ckpt_file=hf_hub_download("mrfakename/openf5-v2", f"{variant}.pt", token=os.getenv("HF_TOKEN")), | |
vocab_file=hf_hub_download("mrfakename/openf5-v2", "vocab.txt", token=os.getenv("HF_TOKEN")) | |
) | |
def generate(text, ref_audio, variant, progress = gr.Progress()): | |
api = models[variant] | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
api.infer( | |
ref_file=ref_audio, | |
ref_text="", | |
gen_text=text, | |
progress=progress, | |
file_wave=f.name | |
) | |
print(f.name) | |
return f.name | |
with gr.Blocks() as demo: | |
gr.Markdown("# OpenF5 TTS Demo\n\nTry out various checkpoints of the OpenF5-TTS model. The model is currently still being trained.\n\nThe goal is to create a permissively-licensed F5-TTS model checkpoint trained on commercially-viable data that can be used as a base for further fine-tuning.") | |
textbox = gr.Textbox(label="Text") | |
audio = gr.Audio(label="Reference Audio", type="filepath") | |
variant = gr.Radio(choices=VARIANTS, value=VARIANTS[0], label="Variant") | |
btn = gr.Button("Generate", variant="primary") | |
output = gr.Audio(label="Output", type="filepath") | |
btn.click(generate, [textbox, audio, variant], outputs=[output]) | |
demo.queue().launch() |