mms-lid-256 / config.json
patrickvonplaten's picture
make style
edc73fd
{
"_name_or_path": "mms-lid-256",
"activation_dropout": 0.05,
"adapter_attn_dim": 16,
"adapter_kernel_size": 3,
"adapter_stride": 2,
"add_adapter": false,
"apply_spec_augment": true,
"architectures": [
"Wav2Vec2ForSequenceClassification"
],
"attention_dropout": 0.05,
"bos_token_id": 1,
"classifier_proj_size": 1024,
"codevector_dim": 1024,
"contrastive_logits_temperature": 0.1,
"conv_bias": true,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "mean",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": true,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_dropout": 0.0,
"feat_extract_norm": "layer",
"feat_proj_dropout": 0.05,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.05,
"hidden_act": "gelu",
"hidden_dropout": 0.05,
"hidden_size": 1280,
"id2label": {
"0": "ara",
"1": "cmn",
"2": "eng",
"3": "spa",
"4": "fra",
"5": "mlg",
"6": "swe",
"7": "por",
"8": "vie",
"9": "ful",
"10": "sun",
"11": "asm",
"12": "ben",
"13": "zlm",
"14": "kor",
"15": "ind",
"16": "hin",
"17": "tuk",
"18": "urd",
"19": "aze",
"20": "slv",
"21": "mon",
"22": "hau",
"23": "tel",
"24": "swh",
"25": "bod",
"26": "rus",
"27": "tur",
"28": "heb",
"29": "mar",
"30": "som",
"31": "tgl",
"32": "tat",
"33": "tha",
"34": "cat",
"35": "ron",
"36": "mal",
"37": "bel",
"38": "pol",
"39": "yor",
"40": "nld",
"41": "bul",
"42": "hat",
"43": "afr",
"44": "isl",
"45": "amh",
"46": "tam",
"47": "hun",
"48": "hrv",
"49": "lit",
"50": "cym",
"51": "fas",
"52": "mkd",
"53": "ell",
"54": "bos",
"55": "deu",
"56": "sqi",
"57": "jav",
"58": "kmr",
"59": "nob",
"60": "uzb",
"61": "snd",
"62": "lat",
"63": "nya",
"64": "grn",
"65": "mya",
"66": "orm",
"67": "lin",
"68": "hye",
"69": "yue",
"70": "pan",
"71": "jpn",
"72": "kaz",
"73": "npi",
"74": "kik",
"75": "kat",
"76": "guj",
"77": "kan",
"78": "tgk",
"79": "ukr",
"80": "ces",
"81": "lav",
"82": "bak",
"83": "khm",
"84": "fao",
"85": "glg",
"86": "ltz",
"87": "xog",
"88": "lao",
"89": "mlt",
"90": "sin",
"91": "aka",
"92": "sna",
"93": "ita",
"94": "srp",
"95": "mri",
"96": "nno",
"97": "pus",
"98": "eus",
"99": "ory",
"100": "lug",
"101": "bre",
"102": "luo",
"103": "slk",
"104": "ewe",
"105": "fin",
"106": "rif",
"107": "dan",
"108": "yid",
"109": "yao",
"110": "mos",
"111": "hne",
"112": "est",
"113": "dyu",
"114": "bam",
"115": "uig",
"116": "sck",
"117": "tso",
"118": "mup",
"119": "ctg",
"120": "ceb",
"121": "war",
"122": "bbc",
"123": "vmw",
"124": "sid",
"125": "tpi",
"126": "mag",
"127": "san",
"128": "kri",
"129": "lon",
"130": "kir",
"131": "run",
"132": "ubl",
"133": "kin",
"134": "rkt",
"135": "xmm",
"136": "tir",
"137": "mai",
"138": "nan",
"139": "nyn",
"140": "bcc",
"141": "hak",
"142": "suk",
"143": "bem",
"144": "rmy",
"145": "awa",
"146": "pcm",
"147": "bgc",
"148": "shn",
"149": "oci",
"150": "wol",
"151": "bci",
"152": "kab",
"153": "ilo",
"154": "bcl",
"155": "haw",
"156": "mad",
"157": "nod",
"158": "sag",
"159": "sas",
"160": "jam",
"161": "mey",
"162": "shi",
"163": "hil",
"164": "ace",
"165": "kam",
"166": "min",
"167": "umb",
"168": "hno",
"169": "ban",
"170": "syl",
"171": "bxg",
"172": "xho",
"173": "mww",
"174": "epo",
"175": "tzm",
"176": "zul",
"177": "ibo",
"178": "abk",
"179": "guz",
"180": "ckb",
"181": "knc",
"182": "nso",
"183": "bho",
"184": "dje",
"185": "tiv",
"186": "gle",
"187": "lua",
"188": "skr",
"189": "bto",
"190": "kea",
"191": "glk",
"192": "ast",
"193": "sat",
"194": "ktu",
"195": "bhb",
"196": "emk",
"197": "kng",
"198": "kmb",
"199": "tsn",
"200": "gom",
"201": "ven",
"202": "sco",
"203": "glv",
"204": "sot",
"205": "sou",
"206": "gno",
"207": "nde",
"208": "bjn",
"209": "ina",
"210": "fmu",
"211": "esg",
"212": "wes",
"213": "pnb",
"214": "phr",
"215": "mui",
"216": "bug",
"217": "mrr",
"218": "kas",
"219": "lir",
"220": "vah",
"221": "ssw",
"222": "rwr",
"223": "pcc",
"224": "hms",
"225": "wbr",
"226": "swv",
"227": "mtr",
"228": "haz",
"229": "aii",
"230": "bns",
"231": "msi",
"232": "wuu",
"233": "hsn",
"234": "bgp",
"235": "tts",
"236": "lmn",
"237": "dcc",
"238": "bew",
"239": "bjj",
"240": "ibb",
"241": "tji",
"242": "hoj",
"243": "cpx",
"244": "cdo",
"245": "daq",
"246": "mut",
"247": "nap",
"248": "czh",
"249": "gdx",
"250": "sdh",
"251": "scn",
"252": "mnp",
"253": "bar",
"254": "mzn",
"255": "gsw"
},
"initializer_range": 0.02,
"intermediate_size": 5120,
"label2id": null,
"layer_norm_eps": 1e-05,
"layerdrop": 0.05,
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_prob": 0.05,
"model_type": "wav2vec2",
"num_adapter_layers": 3,
"num_attention_heads": 16,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 48,
"num_negatives": 100,
"output_hidden_size": 1280,
"pad_token_id": 0,
"proj_codevector_dim": 1024,
"tdnn_dilation": [
1,
2,
3,
1,
1
],
"tdnn_dim": [
512,
512,
512,
512,
1500
],
"tdnn_kernel": [
5,
3,
3,
1,
1
],
"torch_dtype": "float32",
"transformers_version": "4.31.0.dev0",
"use_weighted_layer_sum": false,
"vocab_size": 154,
"xvector_output_dim": 512
}