# app.py import spaces, os, gradio as gr, torch from huggingface_hub import snapshot_download from indextts.infer import IndexTTS model_dir = snapshot_download("IndexTeam/IndexTTS-1.5", local_dir="checkpoints", local_dir_use_symlinks=False) cfg_path = os.path.join(model_dir, "config.yaml") tts = None @spaces.GPU def synth(audio_path, text): global tts if tts is None: tts = IndexTTS(model_dir=model_dir, cfg_path=cfg_path) if torch.cuda.is_available(): tts.to("cuda") out_path = "output.wav" tts.infer(audio_path, text, out_path) return out_path with gr.Blocks() as demo: gr.Markdown("# index-tts 1.5 (english)") text_in = gr.Textbox(label="text prompt") audio_in = gr.Audio(label="reference voice", type="filepath") audio_out= gr.Audio(label="generated speech", type="filepath") gr.Button("generate").click(synth, [audio_in, text_in], audio_out) if __name__ == "__main__": demo.launch()