|
from typing import Annotated, Literal, Optional
|
|
|
|
from pydantic import BaseModel, Field, conint
|
|
|
|
|
|
class ServeReferenceAudio(BaseModel):
|
|
audio: bytes
|
|
text: str
|
|
|
|
|
|
class ServeTTSRequest(BaseModel):
|
|
text: str
|
|
chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
|
|
|
|
format: Literal["wav", "pcm", "mp3"] = "wav"
|
|
mp3_bitrate: Literal[64, 128, 192] = 128
|
|
|
|
references: list[ServeReferenceAudio] = []
|
|
|
|
|
|
|
|
reference_id: str | None = None
|
|
|
|
normalize: bool = True
|
|
mp3_bitrate: Optional[int] = 64
|
|
opus_bitrate: Optional[int] = -1000
|
|
|
|
latency: Literal["normal", "balanced"] = "normal"
|
|
|
|
streaming: bool = False
|
|
emotion: Optional[str] = None
|
|
max_new_tokens: int = 1024
|
|
top_p: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7
|
|
repetition_penalty: Annotated[float, Field(ge=0.9, le=2.0, strict=True)] = 1.2
|
|
temperature: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7
|
|
|