vui / config.json
harrycb's picture
Upload config.json with huggingface_hub
8dc2bd9 verified
{
"model_type": "vui",
"library_name": "vui",
"pipeline_tag": "text-to-speech",
"license": "mit",
"language": ["en"],
"architectures": ["VuiForConditionalGeneration"],
"model_files": {
"base": "vui-100m-base.pt",
"abraham": "vui-abraham-100m.pt",
"cohost": "vui-cohost-100m.pt",
"cohost_alt": "ckpts-vui-cohost-100m.pt",
"tokenizer": "fluac-22hz-22khz.pt"
},
"model_variants": {
"vui-100m-base": {
"description": "Base checkpoint trained on 40k hours of audio conversations",
"file": "vui-100m-base.pt",
"size_mb": 198
},
"vui-abraham-100m": {
"description": "Single speaker model with context awareness",
"file": "vui-abraham-100m.pt",
"size_mb": 198
},
"vui-cohost-100m": {
"description": "Two speakers that can interact with each other",
"file": "vui-cohost-100m.pt",
"size_mb": 198
}
},
"tokenizer_config": {
"audio_tokenizer": "fluac",
"sample_rate": "22khz",
"file": "fluac-22hz-22khz.pt",
"size_mb": 307
},
"training_data": {
"hours": 40000,
"type": "audio_conversations"
},
"capabilities": [
"text-to-speech",
"conversational-speech",
"voice-cloning",
"on-device-inference"
],
"torch_dtype": "float32",
"framework": "pytorch"
}