|
{ |
|
"model_type": "vui", |
|
"library_name": "vui", |
|
"pipeline_tag": "text-to-speech", |
|
"license": "mit", |
|
"language": ["en"], |
|
"architectures": ["VuiForConditionalGeneration"], |
|
"model_files": { |
|
"base": "vui-100m-base.pt", |
|
"abraham": "vui-abraham-100m.pt", |
|
"cohost": "vui-cohost-100m.pt", |
|
"cohost_alt": "ckpts-vui-cohost-100m.pt", |
|
"tokenizer": "fluac-22hz-22khz.pt" |
|
}, |
|
"model_variants": { |
|
"vui-100m-base": { |
|
"description": "Base checkpoint trained on 40k hours of audio conversations", |
|
"file": "vui-100m-base.pt", |
|
"size_mb": 198 |
|
}, |
|
"vui-abraham-100m": { |
|
"description": "Single speaker model with context awareness", |
|
"file": "vui-abraham-100m.pt", |
|
"size_mb": 198 |
|
}, |
|
"vui-cohost-100m": { |
|
"description": "Two speakers that can interact with each other", |
|
"file": "vui-cohost-100m.pt", |
|
"size_mb": 198 |
|
} |
|
}, |
|
"tokenizer_config": { |
|
"audio_tokenizer": "fluac", |
|
"sample_rate": "22khz", |
|
"file": "fluac-22hz-22khz.pt", |
|
"size_mb": 307 |
|
}, |
|
"training_data": { |
|
"hours": 40000, |
|
"type": "audio_conversations" |
|
}, |
|
"capabilities": [ |
|
"text-to-speech", |
|
"conversational-speech", |
|
"voice-cloning", |
|
"on-device-inference" |
|
], |
|
"torch_dtype": "float32", |
|
"framework": "pytorch" |
|
} |