Upload config.json with huggingface_hub
Browse files- config.json +50 -0
config.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "vui",
|
3 |
+
"library_name": "vui",
|
4 |
+
"pipeline_tag": "text-to-speech",
|
5 |
+
"license": "mit",
|
6 |
+
"language": ["en"],
|
7 |
+
"architectures": ["VuiForConditionalGeneration"],
|
8 |
+
"model_files": {
|
9 |
+
"base": "vui-100m-base.pt",
|
10 |
+
"abraham": "vui-abraham-100m.pt",
|
11 |
+
"cohost": "vui-cohost-100m.pt",
|
12 |
+
"cohost_alt": "ckpts-vui-cohost-100m.pt",
|
13 |
+
"tokenizer": "fluac-22hz-22khz.pt"
|
14 |
+
},
|
15 |
+
"model_variants": {
|
16 |
+
"vui-100m-base": {
|
17 |
+
"description": "Base checkpoint trained on 40k hours of audio conversations",
|
18 |
+
"file": "vui-100m-base.pt",
|
19 |
+
"size_mb": 198
|
20 |
+
},
|
21 |
+
"vui-abraham-100m": {
|
22 |
+
"description": "Single speaker model with context awareness",
|
23 |
+
"file": "vui-abraham-100m.pt",
|
24 |
+
"size_mb": 198
|
25 |
+
},
|
26 |
+
"vui-cohost-100m": {
|
27 |
+
"description": "Two speakers that can interact with each other",
|
28 |
+
"file": "vui-cohost-100m.pt",
|
29 |
+
"size_mb": 198
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"tokenizer_config": {
|
33 |
+
"audio_tokenizer": "fluac",
|
34 |
+
"sample_rate": "22khz",
|
35 |
+
"file": "fluac-22hz-22khz.pt",
|
36 |
+
"size_mb": 307
|
37 |
+
},
|
38 |
+
"training_data": {
|
39 |
+
"hours": 40000,
|
40 |
+
"type": "audio_conversations"
|
41 |
+
},
|
42 |
+
"capabilities": [
|
43 |
+
"text-to-speech",
|
44 |
+
"conversational-speech",
|
45 |
+
"voice-cloning",
|
46 |
+
"on-device-inference"
|
47 |
+
],
|
48 |
+
"torch_dtype": "float32",
|
49 |
+
"framework": "pytorch"
|
50 |
+
}
|