OpenF5-TTS / app.py
mrfakename's picture
Update app.py
e7dc064 verified
import spaces
import tempfile
import gradio as gr
from f5_tts.api import F5TTS
from huggingface_hub import hf_hub_download
import os
VARIANTS = ["model_800000", "model_300000", "model_100000"]
models = {}
for variant in VARIANTS:
models[variant] = F5TTS(
ckpt_file=hf_hub_download("mrfakename/openf5-v2", f"{variant}.pt", token=os.getenv("HF_TOKEN")),
vocab_file=hf_hub_download("mrfakename/openf5-v2", "vocab.txt", token=os.getenv("HF_TOKEN"))
)
@spaces.GPU
def generate(text, ref_audio, variant, progress = gr.Progress()):
api = models[variant]
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
api.infer(
ref_file=ref_audio,
ref_text="",
gen_text=text,
progress=progress,
file_wave=f.name
)
print(f.name)
return f.name
with gr.Blocks() as demo:
gr.Markdown("# OpenF5 TTS Demo\n\nTry out various checkpoints of the OpenF5-TTS model. The model is currently still being trained.\n\nThe goal is to create a permissively-licensed F5-TTS model checkpoint trained on commercially-viable data that can be used as a base for further fine-tuning.")
textbox = gr.Textbox(label="Text")
audio = gr.Audio(label="Reference Audio", type="filepath")
variant = gr.Radio(choices=VARIANTS, value=VARIANTS[0], label="Variant")
btn = gr.Button("Generate", variant="primary")
output = gr.Audio(label="Output", type="filepath")
btn.click(generate, [textbox, audio, variant], outputs=[output])
demo.queue().launch()