File size: 2,656 Bytes
8d7f55c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import io
import struct
from typing import AsyncGenerator
from pipecat.frames.frames import AudioRawFrame, Frame
from pipecat.services.ai_services import TTSService
from loguru import logger
try:
from pyht.client import TTSOptions
from pyht.async_client import AsyncClient
from pyht.protos.api_pb2 import Format
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error(
"In order to use PlayHT, you need to `pip install pipecat-ai[playht]`. Also, set `PLAY_HT_USER_ID` and `PLAY_HT_API_KEY` environment variables.")
raise Exception(f"Missing module: {e}")
class PlayHTTTSService(TTSService):
def __init__(self, *, api_key: str, user_id: str, voice_url: str, **kwargs):
super().__init__(**kwargs)
self._user_id = user_id
self._speech_key = api_key
self._client = AsyncClient(
user_id=self._user_id,
api_key=self._speech_key,
)
self._options = TTSOptions(
voice=voice_url,
sample_rate=16000,
quality="higher",
format=Format.FORMAT_WAV)
def can_generate_metrics(self) -> bool:
return True
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"Generating TTS: [{text}]")
try:
b = bytearray()
in_header = True
await self.start_ttfb_metrics()
playht_gen = self._client.tts(
text,
voice_engine="PlayHT2.0-turbo",
options=self._options)
async for chunk in playht_gen:
# skip the RIFF header.
if in_header:
b.extend(chunk)
if len(b) <= 36:
continue
else:
fh = io.BytesIO(b)
fh.seek(36)
(data, size) = struct.unpack('<4sI', fh.read(8))
while data != b'data':
fh.read(size)
(data, size) = struct.unpack('<4sI', fh.read(8))
in_header = False
else:
if len(chunk):
await self.stop_ttfb_metrics()
frame = AudioRawFrame(chunk, 16000, 1)
yield frame
except Exception as e:
logger.exception(f"{self} error generating TTS: {e}")
|