Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -217,6 +217,8 @@ def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=4000
|
|
| 217 |
return ((SAMPLE_RATE, out), ps)
|
| 218 |
|
| 219 |
with gr.Blocks() as basic_tts:
|
|
|
|
|
|
|
| 220 |
with gr.Row():
|
| 221 |
with gr.Column():
|
| 222 |
text = gr.Textbox(label='Input Text')
|
|
@@ -396,6 +398,8 @@ def extract_text(file):
|
|
| 396 |
return None
|
| 397 |
|
| 398 |
with gr.Blocks() as lf_tts:
|
|
|
|
|
|
|
| 399 |
with gr.Row():
|
| 400 |
with gr.Column():
|
| 401 |
file_input = gr.File(file_types=['.pdf', '.txt'], label='Input File: pdf or txt')
|
|
@@ -438,10 +442,36 @@ with gr.Blocks() as lf_tts:
|
|
| 438 |
segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
|
| 439 |
generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
|
| 440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
with gr.Blocks() as app:
|
| 442 |
gr.TabbedInterface(
|
| 443 |
-
[basic_tts, lf_tts],
|
| 444 |
-
['Basic TTS', 'Long-Form'],
|
| 445 |
)
|
| 446 |
|
| 447 |
if __name__ == '__main__':
|
|
|
|
| 217 |
return ((SAMPLE_RATE, out), ps)
|
| 218 |
|
| 219 |
with gr.Blocks() as basic_tts:
|
| 220 |
+
with gr.Row():
|
| 221 |
+
gr.Markdown('Generate speech for one segment of text (up to 510 tokens) using Kokoro, a TTS model with 80 million parameters.')
|
| 222 |
with gr.Row():
|
| 223 |
with gr.Column():
|
| 224 |
text = gr.Textbox(label='Input Text')
|
|
|
|
| 398 |
return None
|
| 399 |
|
| 400 |
with gr.Blocks() as lf_tts:
|
| 401 |
+
with gr.Row():
|
| 402 |
+
gr.Markdown('Generate speech in batches of 100 text segments and automatically join them together. This may exhaust your ZeroGPU quota.')
|
| 403 |
with gr.Row():
|
| 404 |
with gr.Column():
|
| 405 |
file_input = gr.File(file_types=['.pdf', '.txt'], label='Input File: pdf or txt')
|
|
|
|
| 442 |
segment_btn.click(segment_and_tokenize, inputs=[text, voice, skip_square_brackets, newline_split], outputs=[segments])
|
| 443 |
generate_btn.click(lf_generate, inputs=[segments, voice, speed, reduce_noise, opening_cut, closing_cut, ease_in, ease_out, pad_before, pad_after, pad_between], outputs=[audio])
|
| 444 |
|
| 445 |
+
with gr.Blocks() as api_info:
|
| 446 |
+
gr.Markdown("""
|
| 447 |
+
This Space can be used via API. The following code block can be copied and run in one Google Colab cell.
|
| 448 |
+
```
|
| 449 |
+
# 1. Install the Gradio Python client
|
| 450 |
+
!pip install -q gradio_client
|
| 451 |
+
|
| 452 |
+
# 2. Initialize the client
|
| 453 |
+
from gradio_client import Client
|
| 454 |
+
client = Client('hexgrad/Kokoro-TTS')
|
| 455 |
+
|
| 456 |
+
# 3. Call the generate endpoint, which returns a pair: an audio path and a string of output phonemes
|
| 457 |
+
audio_path, out_ps = client.predict(
|
| 458 |
+
text="How could I know? It's an unanswerable question. Like asking an unborn child if they'll lead a good life. They haven't even been born.",
|
| 459 |
+
voice='af_0',
|
| 460 |
+
api_name='/generate'
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
# 4. Display the audio and print the output phonemes
|
| 464 |
+
from IPython.display import display, Audio
|
| 465 |
+
display(Audio(audio_path))
|
| 466 |
+
print(out_ps)
|
| 467 |
+
```
|
| 468 |
+
Note that this Space and the underlying Kokoro model are both under development and subject to change. API reliability is not guaranteed. Also, Hugging Face and/or Gradio might enforce rate limits.
|
| 469 |
+
""")
|
| 470 |
+
|
| 471 |
with gr.Blocks() as app:
|
| 472 |
gr.TabbedInterface(
|
| 473 |
+
[basic_tts, lf_tts, api_info],
|
| 474 |
+
['Basic TTS', 'Long-Form', 'Gradio API'],
|
| 475 |
)
|
| 476 |
|
| 477 |
if __name__ == '__main__':
|