Spaces:
Running
Running
import gradio as gr | |
import wave | |
import numpy as np | |
from io import BytesIO | |
from huggingface_hub import hf_hub_download | |
from piper import PiperVoice | |
from transformers import pipeline | |
import typing | |
model_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.onnx") | |
config_path = hf_hub_download(repo_id="larcanio/piper-voices", filename="es_AR-daniela-high.json") | |
voice = PiperVoice.load(model_path, config_path) | |
def synthesize_speech(text): | |
# Create an in-memory buffer for the WAV file | |
buffer = BytesIO() | |
with wave.open(buffer, 'wb') as wav_file: | |
wav_file.setframerate(voice.config.sample_rate) | |
wav_file.setsampwidth(2) # 16-bit | |
wav_file.setnchannels(1) # mono | |
# Synthesize speech | |
# eztext = preprocess_text(text) | |
voice.synthesize(text, wav_file) | |
# Convert buffer to NumPy array for Gradio output | |
buffer.seek(0) | |
audio_data = np.frombuffer(buffer.read(), dtype=np.int16) | |
return audio_data.tobytes(), None | |
BANNER_TEXT = """ | |
# Demo en español argentino con Piper | |
[***Piper***](https://huggingface.co/rhasspy/piper-voices/) es un modelo de abierto de Texto a Voz (TTS) | |
que permite entrenarse con voz propia, destaca por no requerir conectarse a Internet y ofrecer resultados | |
sin exigir GPU. Inicialmente diseñado para Raspberri Pi. | |
Este demo solo muestra español, puedes probar [voces en otros idiomas](https://rhasspy.github.io/piper-samples/). | |
""" | |
# Using Gradio Blocks | |
with gr.Blocks(theme=gr.themes.Base()) as blocks: | |
gr.Markdown(BANNER_TEXT) | |
input_text = gr.Textbox(label=" ", placeholder="Introduce el texto a leer aquí") | |
output_audio = gr.Audio(label="Audio generado", type="numpy") | |
output_text = gr.Textbox(label="Tokens generados", visible=False) | |
submit_button = gr.Button("Genera audio") | |
submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text]) | |
# Run the app | |
blocks.launch() | |