waveletdeboshir commited on
Commit
4ec70b3
·
verified ·
1 Parent(s): 412ecdc

Add model files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 35,
3
+ "<s>": 34
4
+ }
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder": {
3
+ "feat_in": 64,
4
+ "n_layers": 16,
5
+ "d_model": 768,
6
+ "subsampling_factor": 4,
7
+ "ff_expansion_factor": 4,
8
+ "self_attention_model": "rotary",
9
+ "pos_emb_max_len": 5000,
10
+ "n_heads": 16,
11
+ "conv_kernel_size": 31,
12
+ "flash_attn": false
13
+ },
14
+ "head": {
15
+ "feat_in": 768,
16
+ "num_classes": 34
17
+ },
18
+ "labels": [
19
+ " ",
20
+ "а",
21
+ "б",
22
+ "в",
23
+ "г",
24
+ "д",
25
+ "е",
26
+ "ж",
27
+ "з",
28
+ "и",
29
+ "й",
30
+ "к",
31
+ "л",
32
+ "м",
33
+ "н",
34
+ "о",
35
+ "п",
36
+ "р",
37
+ "с",
38
+ "т",
39
+ "у",
40
+ "ф",
41
+ "х",
42
+ "ц",
43
+ "ч",
44
+ "ш",
45
+ "щ",
46
+ "ъ",
47
+ "ы",
48
+ "ь",
49
+ "э",
50
+ "ю",
51
+ "я"
52
+ ],
53
+ "blank_id": 33
54
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "GigaAMFeatureExtractor",
4
+ "feature_size": 64,
5
+ "hop_length": 160,
6
+ "n_samples": 480000,
7
+ "padding_side": "right",
8
+ "padding_value": 0.0,
9
+ "return_attention_mask": true,
10
+ "sampling_rate": 16000
11
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2b0fbc38a444ce502ee5d8b9e70dcf5e1fa1985379cf82205e2772467b522c
3
+ size 465499370
special_tokens_map.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "pad_token": "[BLANK]",
3
+ "unk_token": "[BLANK]"
4
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "33": {
4
+ "content": "[BLANK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ }
11
+ },
12
+ "bos_token": null,
13
+ "clean_up_tokenization_spaces": false,
14
+ "do_lower_case": false,
15
+ "eos_token": null,
16
+ "model_max_length": 1000,
17
+ "pad_token": "[BLANK]",
18
+ "replace_word_delimiter_char": " ",
19
+ "target_lang": null,
20
+ "tokenizer_class": "GigaAMCTCTokenizer",
21
+ "unk_token": "[BLANK]",
22
+ "word_delimiter_token": " "
23
+ }
vocab.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ " ": 0,
3
+ "[BLANK]": 33,
4
+ "а": 1,
5
+ "б": 2,
6
+ "в": 3,
7
+ "г": 4,
8
+ "д": 5,
9
+ "е": 6,
10
+ "ж": 7,
11
+ "з": 8,
12
+ "и": 9,
13
+ "й": 10,
14
+ "к": 11,
15
+ "л": 12,
16
+ "м": 13,
17
+ "н": 14,
18
+ "о": 15,
19
+ "п": 16,
20
+ "р": 17,
21
+ "с": 18,
22
+ "т": 19,
23
+ "у": 20,
24
+ "ф": 21,
25
+ "х": 22,
26
+ "ц": 23,
27
+ "ч": 24,
28
+ "ш": 25,
29
+ "щ": 26,
30
+ "ъ": 27,
31
+ "ы": 28,
32
+ "ь": 29,
33
+ "э": 30,
34
+ "ю": 31,
35
+ "я": 32
36
+ }