diff --git a/data/Abkhaz-Adyghe.json b/data/Abkhaz-Adyghe.json index 8cd865c2362e03fa4942b9b010d679f0084c7bbb..86231fd87f608137e0ec3f745a126d6ab2045159 100644 --- a/data/Abkhaz-Adyghe.json +++ b/data/Abkhaz-Adyghe.json @@ -2,75 +2,101 @@ "name": "Abkhaz-Adyghe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abkhaz-Abazin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abkhaz", "iso_1_code": "ab", "iso_3_code": "abk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Abaza", "iso_1_code": null, "iso_3_code": "abq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1", + "scripts": [], + "own_tokenizer": false }, { "name": "Circassian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Adyghe", "iso_1_code": null, "iso_3_code": "ady", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Kabardian", "iso_1_code": null, "iso_3_code": "kbd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4", + "scripts": [], + "own_tokenizer": false }, { "name": "Ubyx", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ubykh", "iso_1_code": null, "iso_3_code": "uby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "0", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Afro-Asiatic.json b/data/Afro-Asiatic.json index 63bb1d89b0507996f0f8d7bb3ab2e1ad06472958..28175f6f2f66860b4dbf855eac709fb5187c0572 100644 --- a/data/Afro-Asiatic.json +++ b/data/Afro-Asiatic.json @@ -2,6171 +2,14680 @@ "name": "Afro-Asiatic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Berber", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Guanche", "iso_1_code": null, "iso_3_code": "gnc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "11", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awjila-Sokna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awjilah", "iso_1_code": null, "iso_3_code": "auj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "14", + "scripts": [], + "own_tokenizer": false }, { "name": "Sawknah", "iso_1_code": null, "iso_3_code": "swn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "15", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "13", + "scripts": [], + "own_tokenizer": false }, { "name": "Siwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Siwi", "iso_1_code": null, "iso_3_code": "siz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "17", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "16", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "12", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chenoua", "iso_1_code": null, "iso_3_code": "cnu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "19", + "scripts": [], + "own_tokenizer": false }, { "name": "Atlas", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Judeo-Berber", "iso_1_code": null, "iso_3_code": "jbe", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "21", + "scripts": [], + "own_tokenizer": false }, { "name": "Tachelhit", "iso_1_code": null, "iso_3_code": "shi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "22", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tamazight, Central Atlas", "iso_1_code": null, "iso_3_code": "tzm", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "23", + "scripts": [ + "Tfng" + ], + "own_tokenizer": false }, { "name": "Tamazight, Standard Moroccan", "iso_1_code": null, "iso_3_code": "zgh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "24", + "scripts": [ + "Tfng" + ], + "own_tokenizer": false } - ] + ], + "node_i": "20", + "scripts": [], + "own_tokenizer": false }, { "name": "Kabyle", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kabyle", "iso_1_code": null, "iso_3_code": "kab", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "26", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "25", + "scripts": [], + "own_tokenizer": false }, { "name": "Zenati", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ghadam\u00e8s", "iso_1_code": null, "iso_3_code": "gha", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "29", + "scripts": [], + "own_tokenizer": false }, { "name": "Nafusi", "iso_1_code": null, "iso_3_code": "jbn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "30", + "scripts": [], + "own_tokenizer": false }, { "name": "Sened", "iso_1_code": null, "iso_3_code": "sds", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "31", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "28", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghomara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ghomara", "iso_1_code": null, "iso_3_code": "gho", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "33", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "32", + "scripts": [], + "own_tokenizer": false }, { "name": "Mzab-Wargla", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Taznatit", "iso_1_code": null, "iso_3_code": "grr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "35", + "scripts": [], + "own_tokenizer": false }, { "name": "Tumzabt", "iso_1_code": null, "iso_3_code": "mzb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "36", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagargrent", "iso_1_code": null, "iso_3_code": "oua", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "37", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamazight, Temacine", "iso_1_code": null, "iso_3_code": "tjo", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "38", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "34", + "scripts": [], + "own_tokenizer": false }, { "name": "Riff", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tarifit", "iso_1_code": null, "iso_3_code": "rif", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "40", + "scripts": [], + "own_tokenizer": false }, { "name": "Senhaja Berber", "iso_1_code": null, "iso_3_code": "sjs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "41", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "39", + "scripts": [], + "own_tokenizer": false }, { "name": "Shawiya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tachawit", "iso_1_code": null, "iso_3_code": "shy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "43", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "42", + "scripts": [], + "own_tokenizer": false }, { "name": "Tidikelt", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tamazight, Tidikelt", "iso_1_code": null, "iso_3_code": "tia", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "45", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "44", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "27", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "18", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamasheq", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tamahaq, Tahaggart", "iso_1_code": null, "iso_3_code": "thv", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "48", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "47", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tamasheq", "iso_1_code": null, "iso_3_code": "taq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "50", + "scripts": [ + "Latn", + "Tfng" + ], + "own_tokenizer": false }, { "name": "Tamajeq, Tayart", "iso_1_code": null, "iso_3_code": "thz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "51", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamajaq, Tawallammat", "iso_1_code": null, "iso_3_code": "ttq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "52", + "scripts": [ + "Latn", + "Tfng" + ], + "own_tokenizer": false } - ] + ], + "node_i": "49", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "46", + "scripts": [], + "own_tokenizer": false }, { "name": "Zenaga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tetserret", "iso_1_code": null, "iso_3_code": "tez", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "54", + "scripts": [], + "own_tokenizer": false }, { "name": "Zenaga", "iso_1_code": null, "iso_3_code": "zen", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "55", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "53", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10", + "scripts": [], + "own_tokenizer": false }, { "name": "Chadic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Biu-Mandara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A.1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boga", "iso_1_code": null, "iso_3_code": "bvw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "61", + "scripts": [], + "own_tokenizer": false }, { "name": "Ga\u2019anda", "iso_1_code": null, "iso_3_code": "gqa", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "62", + "scripts": [], + "own_tokenizer": false }, { "name": "Hwana", "iso_1_code": null, "iso_3_code": "hwo", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "63", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "60", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jara", "iso_1_code": null, "iso_3_code": "jaf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "65", + "scripts": [], + "own_tokenizer": false }, { "name": "Tera", "iso_1_code": null, "iso_3_code": "ttr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "66", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "64", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "59", + "scripts": [], + "own_tokenizer": false }, { "name": "A.2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nggwahyi", "iso_1_code": null, "iso_3_code": "ngx", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "68", + "scripts": [], + "own_tokenizer": false }, { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bura-Pabir", "iso_1_code": null, "iso_3_code": "bwr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "70", + "scripts": [], + "own_tokenizer": false }, { "name": "Kibaku", "iso_1_code": null, "iso_3_code": "ckl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "71", + "scripts": [], + "own_tokenizer": false }, { "name": "Kofa", "iso_1_code": null, "iso_3_code": "kso", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "72", + "scripts": [], + "own_tokenizer": false }, { "name": "Putai", "iso_1_code": null, "iso_3_code": "mfl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "73", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "69", + "scripts": [], + "own_tokenizer": false }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nya Huba", "iso_1_code": null, "iso_3_code": "hbb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "75", + "scripts": [], + "own_tokenizer": false }, { "name": "Marghi South", "iso_1_code": null, "iso_3_code": "mfm", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "76", + "scripts": [], + "own_tokenizer": false }, { "name": "Marghi Central", "iso_1_code": null, "iso_3_code": "mrt", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "77", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "74", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "67", + "scripts": [], + "own_tokenizer": false }, { "name": "A.3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bana", "iso_1_code": null, "iso_3_code": "bcw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "79", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kirya-Konzel", "iso_1_code": null, "iso_3_code": "fkk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "80", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamwe", "iso_1_code": null, "iso_3_code": "hig", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "81", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hya", "iso_1_code": null, "iso_3_code": "hya", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "82", + "scripts": [], + "own_tokenizer": false }, { "name": "Psikye", "iso_1_code": null, "iso_3_code": "kvj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "83", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "78", + "scripts": [], + "own_tokenizer": false }, { "name": "A.4", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lamang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lamang", "iso_1_code": null, "iso_3_code": "hia", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "86", + "scripts": [], + "own_tokenizer": false }, { "name": "Vemgo-Mabas", "iso_1_code": null, "iso_3_code": "vem", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "87", + "scripts": [], + "own_tokenizer": false }, { "name": "Hdi", "iso_1_code": null, "iso_3_code": "xed", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "88", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "85", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandara Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Glavda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cineni", "iso_1_code": null, "iso_3_code": "cie", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "91", + "scripts": [], + "own_tokenizer": false }, { "name": "Dghwede", "iso_1_code": null, "iso_3_code": "dgh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "92", + "scripts": [], + "own_tokenizer": false }, { "name": "Guduf-Gava", "iso_1_code": null, "iso_3_code": "gdf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "93", + "scripts": [], + "own_tokenizer": false }, { "name": "Glavda", "iso_1_code": null, "iso_3_code": "glw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "94", + "scripts": [], + "own_tokenizer": false }, { "name": "Gvoko", "iso_1_code": null, "iso_3_code": "ngs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "95", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "90", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wandala", "iso_1_code": null, "iso_3_code": "mfi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "97", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "96", + "scripts": [], + "own_tokenizer": false }, { "name": "Podoko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Parkwa", "iso_1_code": null, "iso_3_code": "pbi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "99", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "98", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "89", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "84", + "scripts": [], + "own_tokenizer": false }, { "name": "A.5", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baldemu", "iso_1_code": null, "iso_3_code": "bdn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "101", + "scripts": [], + "own_tokenizer": false }, { "name": "Cuvok", "iso_1_code": null, "iso_3_code": "cuv", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "102", + "scripts": [], + "own_tokenizer": false }, { "name": "Dugwor", "iso_1_code": null, "iso_3_code": "dme", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "103", + "scripts": [], + "own_tokenizer": false }, { "name": "Giziga, North", "iso_1_code": null, "iso_3_code": "gis", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "104", + "scripts": [], + "own_tokenizer": false }, { "name": "Giziga", "iso_1_code": null, "iso_3_code": "giz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "105", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zulgo-Gemzek", "iso_1_code": null, "iso_3_code": "gnd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "106", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mafa", "iso_1_code": null, "iso_3_code": "maf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "107", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Merey", "iso_1_code": null, "iso_3_code": "meq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "108", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Matal", "iso_1_code": null, "iso_3_code": "mfh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "109", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mefele", "iso_1_code": null, "iso_3_code": "mfj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "110", + "scripts": [], + "own_tokenizer": false }, { "name": "Mofu, North", "iso_1_code": null, "iso_3_code": "mfk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "111", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mofu-Gudur", "iso_1_code": null, "iso_3_code": "mif", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "112", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vame", "iso_1_code": null, "iso_3_code": "mlr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "113", + "scripts": [], + "own_tokenizer": false }, { "name": "Moloko", "iso_1_code": null, "iso_3_code": "mlw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "114", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbuko", "iso_1_code": null, "iso_3_code": "mqb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "115", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Muyang", "iso_1_code": null, "iso_3_code": "muy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "116", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mada", "iso_1_code": null, "iso_3_code": "mxu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "117", + "scripts": [], + "own_tokenizer": false }, { "name": "Wuzlam", "iso_1_code": null, "iso_3_code": "udl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "118", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "100", + "scripts": [], + "own_tokenizer": false }, { "name": "A.6", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sukur", "iso_1_code": null, "iso_3_code": "syk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "120", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "119", + "scripts": [], + "own_tokenizer": false }, { "name": "A.7", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buwal", "iso_1_code": null, "iso_3_code": "bhs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "122", + "scripts": [], + "own_tokenizer": false }, { "name": "Daba", "iso_1_code": null, "iso_3_code": "dbq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "123", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mazagway-Hidi", "iso_1_code": null, "iso_3_code": "dkx", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "124", + "scripts": [], + "own_tokenizer": false }, { "name": "Gavar", "iso_1_code": null, "iso_3_code": "gou", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "125", + "scripts": [], + "own_tokenizer": false }, { "name": "Mina", "iso_1_code": null, "iso_3_code": "hna", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "126", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbudum", "iso_1_code": null, "iso_3_code": "xmd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "127", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "121", + "scripts": [], + "own_tokenizer": false }, { "name": "A.8", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bacama", "iso_1_code": null, "iso_3_code": "bcy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "129", + "scripts": [], + "own_tokenizer": false }, { "name": "Bata", "iso_1_code": null, "iso_3_code": "bta", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "130", + "scripts": [], + "own_tokenizer": false }, { "name": "Fali Muchella", "iso_1_code": null, "iso_3_code": "fli", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "131", + "scripts": [], + "own_tokenizer": false }, { "name": "Gude", "iso_1_code": null, "iso_3_code": "gde", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "132", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gudu", "iso_1_code": null, "iso_3_code": "gdu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "133", + "scripts": [], + "own_tokenizer": false }, { "name": "Holma", "iso_1_code": null, "iso_3_code": "hod", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "134", + "scripts": [], + "own_tokenizer": false }, { "name": "Jimjimen", "iso_1_code": null, "iso_3_code": "jim", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "135", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngwaba", "iso_1_code": null, "iso_3_code": "ngw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "136", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzanyi", "iso_1_code": null, "iso_3_code": "nja", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "137", + "scripts": [], + "own_tokenizer": false }, { "name": "Sharwa", "iso_1_code": null, "iso_3_code": "swq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "138", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsuvan", "iso_1_code": null, "iso_3_code": "tsh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "139", + "scripts": [], + "own_tokenizer": false }, { "name": "Zizilivakan", "iso_1_code": null, "iso_3_code": "ziz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "140", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "128", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "58", + "scripts": [], + "own_tokenizer": false }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "B.1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Auyokawa", "iso_1_code": null, "iso_3_code": "auo", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "143", + "scripts": [], + "own_tokenizer": false }, { "name": "Jilbe", "iso_1_code": null, "iso_3_code": "jie", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "144", + "scripts": [], + "own_tokenizer": false }, { "name": "Buduma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buduma", "iso_1_code": null, "iso_3_code": "bdm", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "146", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "145", + "scripts": [], + "own_tokenizer": false }, { "name": "Jina", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jina", "iso_1_code": null, "iso_3_code": "jia", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "148", + "scripts": [], + "own_tokenizer": false }, { "name": "Majera", "iso_1_code": null, "iso_3_code": "xmj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "149", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "147", + "scripts": [], + "own_tokenizer": false }, { "name": "Kotoko Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Afade", "iso_1_code": null, "iso_3_code": "aal", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "152", + "scripts": [], + "own_tokenizer": false }, { "name": "Mpade", "iso_1_code": null, "iso_3_code": "mpi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "153", + "scripts": [], + "own_tokenizer": false }, { "name": "Maslam", "iso_1_code": null, "iso_3_code": "msv", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "154", + "scripts": [], + "own_tokenizer": false }, { "name": "Malgbe", "iso_1_code": null, "iso_3_code": "mxf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "155", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "151", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lagwan", "iso_1_code": null, "iso_3_code": "kot", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "157", + "scripts": [], + "own_tokenizer": false }, { "name": "Mser", "iso_1_code": null, "iso_3_code": "kqx", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "158", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "156", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "150", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "142", + "scripts": [], + "own_tokenizer": false }, { "name": "B.2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Muskum", "iso_1_code": null, "iso_3_code": "mje", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "160", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbara", "iso_1_code": null, "iso_3_code": "mpk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "161", + "scripts": [], + "own_tokenizer": false }, { "name": "Musgu", "iso_1_code": null, "iso_3_code": "mug", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "162", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "159", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "141", + "scripts": [], + "own_tokenizer": false }, { "name": "C", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gidar", "iso_1_code": null, "iso_3_code": "gid", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "164", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "163", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "57", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A.1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buso", "iso_1_code": null, "iso_3_code": "bso", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "168", + "scripts": [], + "own_tokenizer": false }, { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mulgi", "iso_1_code": null, "iso_3_code": "mvh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "170", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndam", "iso_1_code": null, "iso_3_code": "ndm", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "171", + "scripts": [], + "own_tokenizer": false }, { "name": "Soumraye", "iso_1_code": null, "iso_3_code": "sor", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "172", + "scripts": [], + "own_tokenizer": false }, { "name": "Tumak", "iso_1_code": null, "iso_3_code": "tmc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "173", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "169", + "scripts": [], + "own_tokenizer": false }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boor", "iso_1_code": null, "iso_3_code": "bvf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "175", + "scripts": [], + "own_tokenizer": false }, { "name": "Gadang", "iso_1_code": null, "iso_3_code": "gdk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "176", + "scripts": [], + "own_tokenizer": false }, { "name": "Miltu", "iso_1_code": null, "iso_3_code": "mlj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "177", + "scripts": [], + "own_tokenizer": false }, { "name": "Sarua", "iso_1_code": null, "iso_3_code": "swy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "178", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "174", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "167", + "scripts": [], + "own_tokenizer": false }, { "name": "A.2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kimr\u00e9", "iso_1_code": null, "iso_3_code": "kqp", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "181", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lele", "iso_1_code": null, "iso_3_code": "lln", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "182", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nancere", "iso_1_code": null, "iso_3_code": "nnc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "183", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "180", + "scripts": [], + "own_tokenizer": false }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gabri", "iso_1_code": null, "iso_3_code": "gab", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "185", + "scripts": [], + "own_tokenizer": false }, { "name": "Kabalai", "iso_1_code": null, "iso_3_code": "kvf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "186", + "scripts": [], + "own_tokenizer": false }, { "name": "Tobanga", "iso_1_code": null, "iso_3_code": "tng", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "187", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "184", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "179", + "scripts": [], + "own_tokenizer": false }, { "name": "A.3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kera", "iso_1_code": null, "iso_3_code": "ker", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "189", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kwang", "iso_1_code": null, "iso_3_code": "kvi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "190", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "188", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "166", + "scripts": [], + "own_tokenizer": false }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "B.1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bidiyo", "iso_1_code": null, "iso_3_code": "bid", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "194", + "scripts": [], + "own_tokenizer": false }, { "name": "Dangal\u00e9at", "iso_1_code": null, "iso_3_code": "daa", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "195", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jonkor Bourmataguil", "iso_1_code": null, "iso_3_code": "jeu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "196", + "scripts": [], + "own_tokenizer": false }, { "name": "Mawa", "iso_1_code": null, "iso_3_code": "mcw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "197", + "scripts": [], + "own_tokenizer": false }, { "name": "Migaama", "iso_1_code": null, "iso_3_code": "mmy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "198", + "scripts": [], + "own_tokenizer": false }, { "name": "Mogum", "iso_1_code": null, "iso_3_code": "mou", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "199", + "scripts": [], + "own_tokenizer": false }, { "name": "Mabire", "iso_1_code": null, "iso_3_code": "muj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "200", + "scripts": [], + "own_tokenizer": false }, { "name": "Ubi", "iso_1_code": null, "iso_3_code": "ubi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "193", + "scripts": [], + "own_tokenizer": false }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Birgit", "iso_1_code": null, "iso_3_code": "btf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "203", + "scripts": [], + "own_tokenizer": false }, { "name": "Kajakse", "iso_1_code": null, "iso_3_code": "ckq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "204", + "scripts": [], + "own_tokenizer": false }, { "name": "Masmaje", "iso_1_code": null, "iso_3_code": "mes", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "205", + "scripts": [], + "own_tokenizer": false }, { "name": "Mubi", "iso_1_code": null, "iso_3_code": "mub", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "206", + "scripts": [], + "own_tokenizer": false }, { "name": "Toram", "iso_1_code": null, "iso_3_code": "trj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "207", + "scripts": [], + "own_tokenizer": false }, { "name": "Zerenkel", "iso_1_code": null, "iso_3_code": "zrn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "208", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "202", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "192", + "scripts": [], + "own_tokenizer": false }, { "name": "B.2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mukulu", "iso_1_code": null, "iso_3_code": "moz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "210", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "209", + "scripts": [], + "own_tokenizer": false }, { "name": "B.3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Barein", "iso_1_code": null, "iso_3_code": "bva", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "212", + "scripts": [], + "own_tokenizer": false }, { "name": "Saba", "iso_1_code": null, "iso_3_code": "saa", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "213", + "scripts": [], + "own_tokenizer": false }, { "name": "Sokoro", "iso_1_code": null, "iso_3_code": "sok", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "214", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamki", "iso_1_code": null, "iso_3_code": "tax", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "215", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "211", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "191", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "165", + "scripts": [], + "own_tokenizer": false }, { "name": "Masa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Herd\u00e9", "iso_1_code": null, "iso_3_code": "hed", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "217", + "scripts": [], + "own_tokenizer": false }, { "name": "P\u00e9v\u00e9", "iso_1_code": null, "iso_3_code": "lme", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "218", + "scripts": [], + "own_tokenizer": false }, { "name": "Masana", "iso_1_code": null, "iso_3_code": "mcn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "219", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Marba", "iso_1_code": null, "iso_3_code": "mpg", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "220", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Musey", "iso_1_code": null, "iso_3_code": "mse", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "221", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngete", "iso_1_code": null, "iso_3_code": "nnn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "222", + "scripts": [], + "own_tokenizer": false }, { "name": "Mesme", "iso_1_code": null, "iso_3_code": "zim", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "223", + "scripts": [], + "own_tokenizer": false }, { "name": "Zumaya", "iso_1_code": null, "iso_3_code": "zuy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "224", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "216", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A.1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gwandara", "iso_1_code": null, "iso_3_code": "gwn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "228", + "scripts": [], + "own_tokenizer": false }, { "name": "Hausa", "iso_1_code": "ha", "iso_3_code": "hau", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "229", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "227", + "scripts": [], + "own_tokenizer": false }, { "name": "A.2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bole", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bole Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bole", "iso_1_code": null, "iso_3_code": "bol", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "233", + "scripts": [], + "own_tokenizer": false }, { "name": "Bure", "iso_1_code": null, "iso_3_code": "bvh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "234", + "scripts": [], + "own_tokenizer": false }, { "name": "Beele", "iso_1_code": null, "iso_3_code": "bxq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "235", + "scripts": [], + "own_tokenizer": false }, { "name": "Deno", "iso_1_code": null, "iso_3_code": "dbb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "236", + "scripts": [], + "own_tokenizer": false }, { "name": "Daza", "iso_1_code": null, "iso_3_code": "dzd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "237", + "scripts": [], + "own_tokenizer": false }, { "name": "Geruma", "iso_1_code": null, "iso_3_code": "gea", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "238", + "scripts": [], + "own_tokenizer": false }, { "name": "Gera", "iso_1_code": null, "iso_3_code": "gew", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "239", + "scripts": [], + "own_tokenizer": false }, { "name": "Galambi", "iso_1_code": null, "iso_3_code": "glo", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "240", + "scripts": [], + "own_tokenizer": false }, { "name": "Giiwo", "iso_1_code": null, "iso_3_code": "kks", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "241", + "scripts": [], + "own_tokenizer": false }, { "name": "Kubi", "iso_1_code": null, "iso_3_code": "kof", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "242", + "scripts": [], + "own_tokenizer": false }, { "name": "Kholok", "iso_1_code": null, "iso_3_code": "ktc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "243", + "scripts": [], + "own_tokenizer": false }, { "name": "Maaka", "iso_1_code": null, "iso_3_code": "mew", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "244", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngamo", "iso_1_code": null, "iso_3_code": "nbh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "245", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyam", "iso_1_code": null, "iso_3_code": "nmi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "246", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "232", + "scripts": [], + "own_tokenizer": false }, { "name": "Karekare", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Karekare", "iso_1_code": null, "iso_3_code": "kai", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "248", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "247", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "231", + "scripts": [], + "own_tokenizer": false }, { "name": "Tangale", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dera", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dera", "iso_1_code": null, "iso_3_code": "kna", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "251", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "250", + "scripts": [], + "own_tokenizer": false }, { "name": "Tangale Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kutto", "iso_1_code": null, "iso_3_code": "kpa", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "253", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwaami", "iso_1_code": null, "iso_3_code": "ksq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "254", + "scripts": [], + "own_tokenizer": false }, { "name": "Kushi", "iso_1_code": null, "iso_3_code": "kuh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "255", + "scripts": [], + "own_tokenizer": false }, { "name": "Pero", "iso_1_code": null, "iso_3_code": "pip", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "256", + "scripts": [], + "own_tokenizer": false }, { "name": "Piya-Kwonci", "iso_1_code": null, "iso_3_code": "piy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "257", + "scripts": [], + "own_tokenizer": false }, { "name": "Tangale", "iso_1_code": null, "iso_3_code": "tan", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "258", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "252", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "249", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "230", + "scripts": [], + "own_tokenizer": false }, { "name": "A.3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Angas Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jakattoe", "iso_1_code": null, "iso_3_code": "jrt", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "261", + "scripts": [], + "own_tokenizer": false }, { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngas", "iso_1_code": null, "iso_3_code": "anc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "263", + "scripts": [], + "own_tokenizer": false }, { "name": "Cakfem-Mushere", "iso_1_code": null, "iso_3_code": "cky", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "264", + "scripts": [], + "own_tokenizer": false }, { "name": "Belning", "iso_1_code": null, "iso_3_code": "glb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "265", + "scripts": [], + "own_tokenizer": false }, { "name": "Kofyar", "iso_1_code": null, "iso_3_code": "kwl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "266", + "scripts": [], + "own_tokenizer": false }, { "name": "Miship", "iso_1_code": null, "iso_3_code": "mjs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "267", + "scripts": [], + "own_tokenizer": false }, { "name": "Nteng", "iso_1_code": null, "iso_3_code": "nqt", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "268", + "scripts": [], + "own_tokenizer": false }, { "name": "Mwaghavul", "iso_1_code": null, "iso_3_code": "sur", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "269", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "262", + "scripts": [], + "own_tokenizer": false }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Goemai", "iso_1_code": null, "iso_3_code": "ank", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "271", + "scripts": [], + "own_tokenizer": false }, { "name": "Koenoem", "iso_1_code": null, "iso_3_code": "kcs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "272", + "scripts": [], + "own_tokenizer": false }, { "name": "Tehl", "iso_1_code": null, "iso_3_code": "mtl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "273", + "scripts": [], + "own_tokenizer": false }, { "name": "Piapung", "iso_1_code": null, "iso_3_code": "pcw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "274", + "scripts": [], + "own_tokenizer": false }, { "name": "Tal", "iso_1_code": null, "iso_3_code": "tal", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "275", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "270", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "260", + "scripts": [], + "own_tokenizer": false }, { "name": "Yiwom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ywom", "iso_1_code": null, "iso_3_code": "gek", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "277", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "276", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "259", + "scripts": [], + "own_tokenizer": false }, { "name": "A.4", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fyer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fyer", "iso_1_code": null, "iso_3_code": "fie", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "280", + "scripts": [], + "own_tokenizer": false }, { "name": "Rom", "iso_1_code": null, "iso_3_code": "tdk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "281", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "279", + "scripts": [], + "own_tokenizer": false }, { "name": "Ron Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ron", "iso_1_code": null, "iso_3_code": "cla", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "283", + "scripts": [], + "own_tokenizer": false }, { "name": "Duhwa", "iso_1_code": null, "iso_3_code": "kbz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "284", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulere", "iso_1_code": null, "iso_3_code": "kul", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "285", + "scripts": [], + "own_tokenizer": false }, { "name": "Mindat", "iso_1_code": null, "iso_3_code": "mmf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "286", + "scripts": [], + "own_tokenizer": false }, { "name": "Sya", "iso_1_code": null, "iso_3_code": "scw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "287", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "282", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "278", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "226", + "scripts": [], + "own_tokenizer": false }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "B.1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Teshenawa", "iso_1_code": null, "iso_3_code": "twc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "290", + "scripts": [], + "own_tokenizer": false }, { "name": "Bade Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bade", "iso_1_code": null, "iso_3_code": "bde", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "292", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngizim", "iso_1_code": null, "iso_3_code": "ngi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "293", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "291", + "scripts": [], + "own_tokenizer": false }, { "name": "Duwai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Duwai", "iso_1_code": null, "iso_3_code": "dbp", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "294", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "289", + "scripts": [], + "own_tokenizer": false }, { "name": "B.2", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ajawa", "iso_1_code": null, "iso_3_code": "ajw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "297", + "scripts": [], + "own_tokenizer": false }, { "name": "Burku", "iso_1_code": null, "iso_3_code": "bbt", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "298", + "scripts": [], + "own_tokenizer": false }, { "name": "Dirya", "iso_1_code": null, "iso_3_code": "dwa", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "299", + "scripts": [], + "own_tokenizer": false }, { "name": "Zibinju", "iso_1_code": null, "iso_3_code": "jmb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "300", + "scripts": [], + "own_tokenizer": false }, { "name": "Kariya", "iso_1_code": null, "iso_3_code": "kil", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "301", + "scripts": [], + "own_tokenizer": false }, { "name": "Vune mi", "iso_1_code": null, "iso_3_code": "mkf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "302", + "scripts": [], + "own_tokenizer": false }, { "name": "Pa\u2019anci", "iso_1_code": null, "iso_3_code": "pqa", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "303", + "scripts": [], + "own_tokenizer": false }, { "name": "Siri", "iso_1_code": null, "iso_3_code": "sir", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "304", + "scripts": [], + "own_tokenizer": false }, { "name": "Choogen", "iso_1_code": null, "iso_3_code": "tgd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "305", + "scripts": [], + "own_tokenizer": false }, { "name": "Warji", "iso_1_code": null, "iso_3_code": "wji", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "306", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "296", + "scripts": [], + "own_tokenizer": false }, { "name": "B.3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dass", "iso_1_code": null, "iso_3_code": "dot", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "308", + "scripts": [], + "own_tokenizer": false }, { "name": "Boghom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boghom", "iso_1_code": null, "iso_3_code": "bux", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "310", + "scripts": [], + "own_tokenizer": false }, { "name": "Kir-Balar", "iso_1_code": null, "iso_3_code": "kkr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "311", + "scripts": [], + "own_tokenizer": false }, { "name": "Mansi", "iso_1_code": null, "iso_3_code": "zns", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "312", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "309", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jimi", "iso_1_code": null, "iso_3_code": "jmi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "314", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "313", + "scripts": [], + "own_tokenizer": false }, { "name": "Guruntum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Guruntum-Mbaaru", "iso_1_code": null, "iso_3_code": "grd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "316", + "scripts": [], + "own_tokenizer": false }, { "name": "Juu", "iso_1_code": null, "iso_3_code": "juu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "317", + "scripts": [], + "own_tokenizer": false }, { "name": "Tala", "iso_1_code": null, "iso_3_code": "tak", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "318", + "scripts": [], + "own_tokenizer": false }, { "name": "Zamwal", "iso_1_code": null, "iso_3_code": "zah", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "319", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "315", + "scripts": [], + "own_tokenizer": false }, { "name": "Zaar Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cha\u2019ari", "iso_1_code": null, "iso_3_code": "cxh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "321", + "scripts": [], + "own_tokenizer": false }, { "name": "Dokshi", "iso_1_code": null, "iso_3_code": "dsk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "322", + "scripts": [], + "own_tokenizer": false }, { "name": "Dyarim", "iso_1_code": null, "iso_3_code": "dyr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "323", + "scripts": [], + "own_tokenizer": false }, { "name": "Gyaazi", "iso_1_code": null, "iso_3_code": "gyz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "324", + "scripts": [], + "own_tokenizer": false }, { "name": "Luri", "iso_1_code": null, "iso_3_code": "ldd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "325", + "scripts": [], + "own_tokenizer": false }, { "name": "Dir-Nyamzak-Mbarimi", "iso_1_code": null, "iso_3_code": "nzr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "326", + "scripts": [], + "own_tokenizer": false }, { "name": "Pesse", "iso_1_code": null, "iso_3_code": "pze", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "327", + "scripts": [], + "own_tokenizer": false }, { "name": "Saya", "iso_1_code": null, "iso_3_code": "say", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "328", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tulai", "iso_1_code": null, "iso_3_code": "tvi", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "329", + "scripts": [], + "own_tokenizer": false }, { "name": "Buli", "iso_1_code": null, "iso_3_code": "uly", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "330", + "scripts": [], + "own_tokenizer": false }, { "name": "Zari", "iso_1_code": null, "iso_3_code": "zaz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "331", + "scripts": [], + "own_tokenizer": false }, { "name": "Bu", "iso_1_code": null, "iso_3_code": "zbu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "332", + "scripts": [], + "own_tokenizer": false }, { "name": "Zeem", "iso_1_code": null, "iso_3_code": "zem", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "333", + "scripts": [], + "own_tokenizer": false }, { "name": "Zul", "iso_1_code": null, "iso_3_code": "zlu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "334", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "320", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "307", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "288", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "225", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "56", + "scripts": [], + "own_tokenizer": false }, { "name": "Cushitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Xamtanga", "iso_1_code": null, "iso_3_code": "xan", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "338", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "337", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bilen", "iso_1_code": null, "iso_3_code": "byn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "340", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "339", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awngi", "iso_1_code": null, "iso_3_code": "awn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "342", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "341", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Qimant", "iso_1_code": null, "iso_3_code": "ahg", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "344", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "343", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "336", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boon", "iso_1_code": null, "iso_3_code": "bnl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "346", + "scripts": [], + "own_tokenizer": false }, { "name": "Dullay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ale", "iso_1_code": null, "iso_3_code": "gwd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "348", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsamai", "iso_1_code": null, "iso_3_code": "tsb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "349", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "347", + "scripts": [], + "own_tokenizer": false }, { "name": "Highland", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Alaba-K\u2019abeena", "iso_1_code": null, "iso_3_code": "alw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "351", + "scripts": [], + "own_tokenizer": false }, { "name": "Burji", "iso_1_code": null, "iso_3_code": "bji", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "352", + "scripts": [], + "own_tokenizer": false }, { "name": "Gedeo", "iso_1_code": null, "iso_3_code": "drs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "353", + "scripts": [], + "own_tokenizer": false }, { "name": "Hadiyya", "iso_1_code": null, "iso_3_code": "hdy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "354", + "scripts": [], + "own_tokenizer": false }, { "name": "Kambaata", "iso_1_code": null, "iso_3_code": "ktb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "355", + "scripts": [ + "Ethi" + ], + "own_tokenizer": false }, { "name": "Libido", "iso_1_code": null, "iso_3_code": "liq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "356", + "scripts": [], + "own_tokenizer": false }, { "name": "Sidamo", "iso_1_code": null, "iso_3_code": "sid", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "357", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "350", + "scripts": [], + "own_tokenizer": false }, { "name": "Konso-Gidole", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mositacha", "iso_1_code": null, "iso_3_code": "dox", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "359", + "scripts": [], + "own_tokenizer": false }, { "name": "Dirasha", "iso_1_code": null, "iso_3_code": "gdl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "360", + "scripts": [], + "own_tokenizer": false }, { "name": "Konso", "iso_1_code": null, "iso_3_code": "kxc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "361", + "scripts": [ + "Ethi" + ], + "own_tokenizer": false } - ] + ], + "node_i": "358", + "scripts": [], + "own_tokenizer": false }, { "name": "Oromo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oromo, Borana-Arsi-Guji", "iso_1_code": "om", "iso_3_code": "gax", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "363", + "scripts": [], + "own_tokenizer": false }, { "name": "Oromo, West Central", "iso_1_code": "om", "iso_3_code": "gaz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "364", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Oromo, Eastern", "iso_1_code": "om", "iso_3_code": "hae", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "365", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Orma", "iso_1_code": "om", "iso_3_code": "orc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "366", + "scripts": [], + "own_tokenizer": false }, { "name": "Waata", "iso_1_code": null, "iso_3_code": "ssn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "367", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "362", + "scripts": [], + "own_tokenizer": false }, { "name": "Rendille-Boni", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aweer", "iso_1_code": null, "iso_3_code": "bob", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "369", + "scripts": [], + "own_tokenizer": false }, { "name": "Rendille", "iso_1_code": null, "iso_3_code": "rel", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "370", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "368", + "scripts": [], + "own_tokenizer": false }, { "name": "Saho-Afar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Afar", "iso_1_code": "aa", "iso_3_code": "aar", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "372", + "scripts": [], + "own_tokenizer": false }, { "name": "Saho", "iso_1_code": null, "iso_3_code": "ssy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "373", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "371", + "scripts": [], + "own_tokenizer": false }, { "name": "Somali", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dabarre", "iso_1_code": null, "iso_3_code": "dbr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "375", + "scripts": [], + "own_tokenizer": false }, { "name": "Garre", "iso_1_code": null, "iso_3_code": "gex", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "376", + "scripts": [], + "own_tokenizer": false }, { "name": "Girirra", "iso_1_code": null, "iso_3_code": "gii", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "377", + "scripts": [], + "own_tokenizer": false }, { "name": "Jiiddu", "iso_1_code": null, "iso_3_code": "jii", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "378", + "scripts": [], + "own_tokenizer": false }, { "name": "Somali", "iso_1_code": "so", "iso_3_code": "som", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "379", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tunni", "iso_1_code": null, "iso_3_code": "tqq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "380", + "scripts": [], + "own_tokenizer": false }, { "name": "Maay", "iso_1_code": null, "iso_3_code": "ymm", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "381", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "374", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Omo-Tana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Arbore", "iso_1_code": null, "iso_3_code": "arv", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "383", + "scripts": [], + "own_tokenizer": false }, { "name": "Baiso", "iso_1_code": null, "iso_3_code": "bsw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "384", + "scripts": [], + "own_tokenizer": false }, { "name": "Daasanach", "iso_1_code": null, "iso_3_code": "dsh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "385", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "El Molo", "iso_1_code": null, "iso_3_code": "elo", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "386", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "382", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaaku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yaaku", "iso_1_code": null, "iso_3_code": "muu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "388", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "387", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "345", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bedawiyet", "iso_1_code": null, "iso_3_code": "bej", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "390", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "389", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aas\u00e1x", "iso_1_code": null, "iso_3_code": "aas", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "392", + "scripts": [], + "own_tokenizer": false }, { "name": "Burunge", "iso_1_code": null, "iso_3_code": "bds", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "393", + "scripts": [], + "own_tokenizer": false }, { "name": "Dahalo", "iso_1_code": null, "iso_3_code": "dal", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "394", + "scripts": [], + "own_tokenizer": false }, { "name": "Gorowa", "iso_1_code": null, "iso_3_code": "gow", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "395", + "scripts": [], + "own_tokenizer": false }, { "name": "Iraqw", "iso_1_code": null, "iso_3_code": "irk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "396", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Alagwa", "iso_1_code": null, "iso_3_code": "wbj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "397", + "scripts": [], + "own_tokenizer": false }, { "name": "Kw\u2019adza", "iso_1_code": null, "iso_3_code": "wka", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "391", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "335", + "scripts": [], + "own_tokenizer": false }, { "name": "Egyptian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "coptic", - "tokenizer": "StanzaTokenizer(\"cop\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Coptic", "iso_1_code": null, "iso_3_code": "cop", - "tokenizer": { - "name": "coptic", - "tokenizer": "StanzaTokenizer(\"cop\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "400", + "scripts": [ + "Copt" + ], + "own_tokenizer": true } - ] + ], + "node_i": "399", + "scripts": [], + "own_tokenizer": false }, { "name": "Omotic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dizoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dizin", "iso_1_code": null, "iso_3_code": "mdx", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "404", + "scripts": [], + "own_tokenizer": false }, { "name": "Nayi", "iso_1_code": null, "iso_3_code": "noz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "405", + "scripts": [], + "own_tokenizer": false }, { "name": "Sheko", "iso_1_code": null, "iso_3_code": "she", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "406", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "403", + "scripts": [], + "own_tokenizer": false }, { "name": "Gonga-Gimojan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gimojan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Janjero", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yemsa", "iso_1_code": null, "iso_3_code": "jnj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "410", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "409", + "scripts": [], + "own_tokenizer": false }, { "name": "Ometo-Gimira", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tsaara", "iso_1_code": null, "iso_3_code": "cra", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "413", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "412", + "scripts": [], + "own_tokenizer": false }, { "name": "Gimira", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bench", "iso_1_code": null, "iso_3_code": "bcq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "415", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "414", + "scripts": [], + "own_tokenizer": false }, { "name": "Ometo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Male", "iso_1_code": null, "iso_3_code": "mdy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "417", + "scripts": [ + "Ethi" + ], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dorze", "iso_1_code": null, "iso_3_code": "doz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "419", + "scripts": [], + "own_tokenizer": false }, { "name": "Dawro", "iso_1_code": null, "iso_3_code": "dwr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "420", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gamo", "iso_1_code": null, "iso_3_code": "gmv", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "421", + "scripts": [ + "Latn", + "Ethi" + ], + "own_tokenizer": false }, { "name": "Gofa", "iso_1_code": null, "iso_3_code": "gof", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "422", + "scripts": [ + "Latn", + "Ethi" + ], + "own_tokenizer": false }, { "name": "Melo", "iso_1_code": null, "iso_3_code": "mfx", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "423", + "scripts": [], + "own_tokenizer": false }, { "name": "Oyda", "iso_1_code": null, "iso_3_code": "oyd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "424", + "scripts": [], + "own_tokenizer": false }, { "name": "Wolaytta", "iso_1_code": null, "iso_3_code": "wal", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "425", + "scripts": [ + "Latn", + "Ethi" + ], + "own_tokenizer": false } - ] + ], + "node_i": "418", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kachama-Ganjule", "iso_1_code": null, "iso_3_code": "kcx", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "427", + "scripts": [], + "own_tokenizer": false }, { "name": "Koorete", "iso_1_code": null, "iso_3_code": "kqy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "428", + "scripts": [ + "Ethi" + ], + "own_tokenizer": false }, { "name": "Zayse", "iso_1_code": null, "iso_3_code": "zay", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "429", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "426", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Basketo", "iso_1_code": null, "iso_3_code": "bst", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "431", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "430", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "416", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "411", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "408", + "scripts": [], + "own_tokenizer": false }, { "name": "Gonga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Anfillo", "iso_1_code": null, "iso_3_code": "myo", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "434", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "433", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Borna", "iso_1_code": null, "iso_3_code": "bwo", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "436", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "435", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kafa", "iso_1_code": null, "iso_3_code": "kbr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "438", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Shekkacho", "iso_1_code": null, "iso_3_code": "moy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "439", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "437", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "432", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "407", + "scripts": [], + "own_tokenizer": false }, { "name": "Mao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ganza", "iso_1_code": null, "iso_3_code": "gza", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "441", + "scripts": [], + "own_tokenizer": false }, { "name": "Hozo", "iso_1_code": null, "iso_3_code": "hoz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "442", + "scripts": [], + "own_tokenizer": false }, { "name": "Mawes Aasse", "iso_1_code": null, "iso_3_code": "myf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "443", + "scripts": [], + "own_tokenizer": false }, { "name": "Seze", "iso_1_code": null, "iso_3_code": "sze", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "444", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "402", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aari", "iso_1_code": null, "iso_3_code": "aiw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "446", + "scripts": [], + "own_tokenizer": false }, { "name": "Hamer-Banna", "iso_1_code": null, "iso_3_code": "amf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "447", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dime", "iso_1_code": null, "iso_3_code": "dim", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "448", + "scripts": [], + "own_tokenizer": false }, { "name": "Gayil", "iso_1_code": null, "iso_3_code": "gyl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "449", + "scripts": [], + "own_tokenizer": false }, { "name": "Karo", "iso_1_code": null, "iso_3_code": "kxh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "450", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "445", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "401", + "scripts": [], + "own_tokenizer": false }, { "name": "Semitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Faifi", "iso_1_code": null, "iso_3_code": "fif", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "453", + "scripts": [], + "own_tokenizer": false }, { "name": "R\u0101zi\u1e25\u012b", "iso_1_code": null, "iso_3_code": "rzh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "454", + "scripts": [], + "own_tokenizer": false }, { "name": "Aramaic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Syriac", "iso_1_code": null, "iso_3_code": "syc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "457", + "scripts": [ + "Syrc" + ], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Assyrian Neo-Aramaic", "iso_1_code": null, "iso_3_code": "aii", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "460", + "scripts": [ + "Syrc" + ], + "own_tokenizer": false }, { "name": "Bohtan Neo-Aramaic", "iso_1_code": null, "iso_3_code": "bhn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "461", + "scripts": [], + "own_tokenizer": false }, { "name": "Barzani-Sandu Jewish Neo-Aramaic", "iso_1_code": null, "iso_3_code": "bjf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "462", + "scripts": [], + "own_tokenizer": false }, { "name": "Chaldean Neo-Aramaic", "iso_1_code": null, "iso_3_code": "cld", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "463", + "scripts": [], + "own_tokenizer": false }, { "name": "H\u00e9rtevin", "iso_1_code": null, "iso_3_code": "hrt", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "464", + "scripts": [], + "own_tokenizer": false }, { "name": "Koy Sanjaq Surat", "iso_1_code": null, "iso_3_code": "kqd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "465", + "scripts": [], + "own_tokenizer": false }, { "name": "Senaya", "iso_1_code": null, "iso_3_code": "syn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "466", + "scripts": [], + "own_tokenizer": false }, { "name": "Jewish Babylonian Aramaic", "iso_1_code": null, "iso_3_code": "tmr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "467", + "scripts": [], + "own_tokenizer": false }, { "name": "Trans-Zab", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Inter-Zab Jewish Neo-Aramaic", "iso_1_code": null, "iso_3_code": "aij", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "469", + "scripts": [], + "own_tokenizer": false }, { "name": "Hulaul\u00e1", "iso_1_code": null, "iso_3_code": "huy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "470", + "scripts": [], + "own_tokenizer": false }, { "name": "Lishana Deni", "iso_1_code": null, "iso_3_code": "lsd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "471", + "scripts": [], + "own_tokenizer": false }, { "name": "Lish\u00e1n Noshan", "iso_1_code": null, "iso_3_code": "trg", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "472", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "468", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "459", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mlahs\u00f6", "iso_1_code": null, "iso_3_code": "lhs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "474", + "scripts": [], + "own_tokenizer": false }, { "name": "Turoyo", "iso_1_code": null, "iso_3_code": "tru", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "475", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "473", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "458", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandaic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Neo-Mandaic", "iso_1_code": null, "iso_3_code": "mid", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "477", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandaic, Classical", "iso_1_code": null, "iso_3_code": "myz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "478", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "476", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "456", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Western Neo-Aramaic", "iso_1_code": null, "iso_3_code": "amw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "480", + "scripts": [], + "own_tokenizer": false }, { "name": "Samaritan Aramaic", "iso_1_code": null, "iso_3_code": "sam", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "481", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "479", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "455", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Arabic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Arabic, Algerian Saharan", "iso_1_code": "ar", "iso_3_code": "aao", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "484", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Tajiki", "iso_1_code": "ar", "iso_3_code": "abh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "485", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Baharna", "iso_1_code": "ar", "iso_3_code": "abv", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "486", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Mesopotamian", "iso_1_code": "ar", "iso_3_code": "acm", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "487", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Ta\u2019izzi-Adeni", "iso_1_code": "ar", "iso_3_code": "acq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "488", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Hijazi", "iso_1_code": "ar", "iso_3_code": "acw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "489", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Omani", "iso_1_code": "ar", "iso_3_code": "acx", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "490", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Cypriot", "iso_1_code": "ar", "iso_3_code": "acy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "491", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Dhofari", "iso_1_code": "ar", "iso_3_code": "adf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "492", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Tunisian", "iso_1_code": "ar", "iso_3_code": "aeb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "493", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Sa\u2019idi", "iso_1_code": "ar", "iso_3_code": "aec", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "494", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Gulf", "iso_1_code": "ar", "iso_3_code": "afb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "495", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Judeo-Moroccan", "iso_1_code": null, "iso_3_code": "aju", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "496", + "scripts": [], + "own_tokenizer": false }, { "name": "Arabic, Levantine", "iso_1_code": "ar", "iso_3_code": "apc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "497", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Sudanese", "iso_1_code": "ar", "iso_3_code": "apd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "498", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Standard", "iso_1_code": "ar", "iso_3_code": "arb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "499", + "scripts": [ + "Arab", + "Latn" + ], + "own_tokenizer": true }, { "name": "Arabic, Algerian", "iso_1_code": "ar", "iso_3_code": "arq", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "500", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Najdi", "iso_1_code": "ar", "iso_3_code": "ars", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "501", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Moroccan", "iso_1_code": "ar", "iso_3_code": "ary", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "502", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Egyptian", "iso_1_code": "ar", "iso_3_code": "arz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "503", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Uzbeki", "iso_1_code": "ar", "iso_3_code": "auz", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "504", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Eastern Egyptian Bedawi", "iso_1_code": "ar", "iso_3_code": "avl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "505", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Hadrami", "iso_1_code": "ar", "iso_3_code": "ayh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "506", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Libyan", "iso_1_code": "ar", "iso_3_code": "ayl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "507", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Sanaani", "iso_1_code": "ar", "iso_3_code": "ayn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "508", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, North Mesopotamian", "iso_1_code": "ar", "iso_3_code": "ayp", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "509", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Judeo-Yemeni", "iso_1_code": null, "iso_3_code": "jye", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "510", + "scripts": [], + "own_tokenizer": false }, { "name": "Hassaniyya", "iso_1_code": null, "iso_3_code": "mey", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "511", + "scripts": [], + "own_tokenizer": false }, { "name": "Maltese", "iso_1_code": "mt", "iso_3_code": "mlt", - "tokenizer": { - "name": "maltese", - "tokenizer": "StanzaTokenizer(\"mt\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "512", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Arabic, Chadian", "iso_1_code": "ar", "iso_3_code": "shu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "513", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Arabic, Shihhi", "iso_1_code": "ar", "iso_3_code": "ssh", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "514", + "scripts": [], + "own_tokenizer": true }, { "name": "Arabic, Judeo-Iraqi", "iso_1_code": null, "iso_3_code": "yhd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "515", + "scripts": [], + "own_tokenizer": false }, { "name": "Arabic, Judeo-Tripolitanian", "iso_1_code": null, "iso_3_code": "yud", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "516", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "483", + "scripts": [], + "own_tokenizer": false }, { "name": "Canaanite", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hebrew", - "tokenizer": "SpaCyTokenizer(\"he\")" + "tokenizers": { + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Hebrew, Ancient", "iso_1_code": null, "iso_3_code": "hbo", - "tokenizer": { - "name": "ancient_hebrew", - "tokenizer": "StanzaTokenizer(\"hbo\")" + "tokenizers": { + "Hebr": { + "full_object": "StanzaTokenizer(\"hbo\")", + "original_lang_name": "ancient_hebrew", + "original_lang_code": "hbo", + "scripts": [ + "Hebr" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "518", + "scripts": [ + "Hebr" + ], + "own_tokenizer": true }, { "name": "Hebrew", "iso_1_code": "he", "iso_3_code": "heb", - "tokenizer": { - "name": "hebrew", - "tokenizer": "SpaCyTokenizer(\"he\")" + "tokenizers": { + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "519", + "scripts": [ + "Hebr" + ], + "own_tokenizer": true }, { "name": "Samaritan Hebrew", "iso_1_code": null, "iso_3_code": "smp", - "tokenizer": { - "name": "hebrew", - "tokenizer": "SpaCyTokenizer(\"he\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "520", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "517", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "482", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "452", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Ethiopian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Dahalik", "iso_1_code": null, "iso_3_code": "dlk", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "524", + "scripts": [], + "own_tokenizer": false }, { "name": "Geez", "iso_1_code": null, "iso_3_code": "gez", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "525", + "scripts": [], + "own_tokenizer": false }, { "name": "Tigr\u00e9", "iso_1_code": null, "iso_3_code": "tig", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "526", + "scripts": [ + "Ethi" + ], + "own_tokenizer": false }, { "name": "Tigrigna", "iso_1_code": "ti", "iso_3_code": "tir", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "527", + "scripts": [ + "Ethi" + ], + "own_tokenizer": true } - ] + ], + "node_i": "523", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Outer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "n-Group", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gafat", "iso_1_code": null, "iso_3_code": "gft", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "531", + "scripts": [], + "own_tokenizer": false }, { "name": "Kistane", "iso_1_code": null, "iso_3_code": "gru", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "532", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "530", + "scripts": [], + "own_tokenizer": false }, { "name": "tt-Group", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Inor", "iso_1_code": null, "iso_3_code": "ior", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "534", + "scripts": [], + "own_tokenizer": false }, { "name": "Mesqan", "iso_1_code": null, "iso_3_code": "mvz", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "535", + "scripts": [], + "own_tokenizer": false }, { "name": "Mesmes", "iso_1_code": null, "iso_3_code": "mys", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "536", + "scripts": [], + "own_tokenizer": false }, { "name": "Sebat Bet Gurage", "iso_1_code": null, "iso_3_code": "sgw", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "537", + "scripts": [ + "Ethi" + ], + "own_tokenizer": false } - ] + ], + "node_i": "533", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "529", + "scripts": [], + "own_tokenizer": false }, { "name": "Transversal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Amharic-Argobba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Argobba", "iso_1_code": null, "iso_3_code": "agj", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "540", + "scripts": [], + "own_tokenizer": false }, { "name": "Amharic", "iso_1_code": "am", "iso_3_code": "amh", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "541", + "scripts": [ + "Ethi" + ], + "own_tokenizer": true } - ] + ], + "node_i": "539", + "scripts": [], + "own_tokenizer": false }, { "name": "Harari-East Gurage", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Harari", "iso_1_code": null, "iso_3_code": "har", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "543", + "scripts": [], + "own_tokenizer": false }, { "name": "Silt\u2019e", "iso_1_code": null, "iso_3_code": "stv", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "544", + "scripts": [], + "own_tokenizer": false }, { "name": "Wolane", "iso_1_code": null, "iso_3_code": "wle", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "545", + "scripts": [], + "own_tokenizer": false }, { "name": "Zay", "iso_1_code": null, "iso_3_code": "zwa", - "tokenizer": { - "name": "amharic", - "tokenizer": "SpaCyTokenizer(\"am\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "546", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "542", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "538", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "528", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "522", + "scripts": [], + "own_tokenizer": false }, { "name": "South Arabian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bathari", "iso_1_code": null, "iso_3_code": "bhm", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "548", + "scripts": [], + "own_tokenizer": false }, { "name": "Mehri", "iso_1_code": null, "iso_3_code": "gdq", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "549", + "scripts": [], + "own_tokenizer": false }, { "name": "Hoby\u00f3t", "iso_1_code": null, "iso_3_code": "hoh", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "550", + "scripts": [], + "own_tokenizer": false }, { "name": "Harsusi", "iso_1_code": null, "iso_3_code": "hss", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "551", + "scripts": [], + "own_tokenizer": false }, { "name": "Shehri", "iso_1_code": null, "iso_3_code": "shv", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "552", + "scripts": [], + "own_tokenizer": false }, { "name": "Soqotri", "iso_1_code": null, "iso_3_code": "sqt", - "tokenizer": { - "name": "tigrinya", - "tokenizer": "SpaCyTokenizer(\"ti\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "553", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "547", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "451", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "scripts": [ + "Copt" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "scripts": [ + "Hebr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "scripts": [ + "Ethi" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ongota", "iso_1_code": null, "iso_3_code": "bxe", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "555", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "554", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Algic.json b/data/Algic.json index 34b987f575dca9cd1d9750f266d59ecb5c2ac3ca..d211f14007af7dd424db5852b2c158d363f1dcd0 100644 --- a/data/Algic.json +++ b/data/Algic.json @@ -2,482 +2,630 @@ "name": "Algic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Algonquian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Blackfoot", "iso_1_code": null, "iso_3_code": "bla", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "558", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cheyenne", "iso_1_code": null, "iso_3_code": "chy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "559", + "scripts": [], + "own_tokenizer": false }, { "name": "Menominee", "iso_1_code": null, "iso_3_code": "mez", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "560", + "scripts": [], + "own_tokenizer": false }, { "name": "Miami", "iso_1_code": null, "iso_3_code": "mia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "561", + "scripts": [], + "own_tokenizer": false }, { "name": "Nawathinehena", "iso_1_code": null, "iso_3_code": "nwa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "562", + "scripts": [], + "own_tokenizer": false }, { "name": "Shawnee", "iso_1_code": null, "iso_3_code": "sjw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "563", + "scripts": [], + "own_tokenizer": false }, { "name": "Arapaho", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arapaho", "iso_1_code": null, "iso_3_code": "arp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "565", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gros Ventre", "iso_1_code": null, "iso_3_code": "ats", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "566", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "564", + "scripts": [], + "own_tokenizer": false }, { "name": "Cree-Montagnais", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Atikamekw", "iso_1_code": null, "iso_3_code": "atj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "568", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cree, Southern East", "iso_1_code": "cr", "iso_3_code": "crj", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "569", + "scripts": [ + "Cans" + ], + "own_tokenizer": false }, { "name": "Cree, Plains", "iso_1_code": "cr", "iso_3_code": "crk", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "570", + "scripts": [ + "Latn", + "Cans" + ], + "own_tokenizer": false }, { "name": "Cree, Northern East", "iso_1_code": "cr", "iso_3_code": "crl", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "571", + "scripts": [ + "Cans" + ], + "own_tokenizer": false }, { "name": "Cree, Moose", "iso_1_code": "cr", "iso_3_code": "crm", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "572", + "scripts": [ + "Cans" + ], + "own_tokenizer": false }, { "name": "Cree, Swampy", "iso_1_code": "cr", "iso_3_code": "csw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "573", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cree, Woods", "iso_1_code": "cr", "iso_3_code": "cwd", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "574", + "scripts": [ + "Cans" + ], + "own_tokenizer": false }, { "name": "Innu", "iso_1_code": null, "iso_3_code": "moe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "575", + "scripts": [], + "own_tokenizer": false }, { "name": "Naskapi", "iso_1_code": null, "iso_3_code": "nsk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "576", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "567", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Algonquian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Carolina Algonquian", "iso_1_code": null, "iso_3_code": "crr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "578", + "scripts": [], + "own_tokenizer": false }, { "name": "Etchemin", "iso_1_code": null, "iso_3_code": "etc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "579", + "scripts": [], + "own_tokenizer": false }, { "name": "Mi\u2019kmaq", "iso_1_code": null, "iso_3_code": "mic", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "580", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Powhatan", "iso_1_code": null, "iso_3_code": "pim", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "581", + "scripts": [], + "own_tokenizer": false }, { "name": "Malecite-Passamaquoddy", "iso_1_code": null, "iso_3_code": "pqm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "582", + "scripts": [], + "own_tokenizer": false }, { "name": "Quiripi", "iso_1_code": null, "iso_3_code": "qyp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "583", + "scripts": [], + "own_tokenizer": false }, { "name": "Wampanoag", "iso_1_code": null, "iso_3_code": "wam", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "584", + "scripts": [], + "own_tokenizer": false }, { "name": "Loup B", "iso_1_code": null, "iso_3_code": "xlb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "585", + "scripts": [], + "own_tokenizer": false }, { "name": "Loup A", "iso_1_code": null, "iso_3_code": "xlo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "586", + "scripts": [], + "own_tokenizer": false }, { "name": "Narragansett", "iso_1_code": null, "iso_3_code": "xnt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "587", + "scripts": [], + "own_tokenizer": false }, { "name": "Mohegan-Pequot", "iso_1_code": null, "iso_3_code": "xpq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "588", + "scripts": [], + "own_tokenizer": false }, { "name": "Abenaki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abenaki, Eastern", "iso_1_code": null, "iso_3_code": "aaq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "590", + "scripts": [], + "own_tokenizer": false }, { "name": "Abenaki, Western", "iso_1_code": null, "iso_3_code": "abe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "591", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "589", + "scripts": [], + "own_tokenizer": false }, { "name": "Delaware", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mahican", "iso_1_code": null, "iso_3_code": "mjy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "593", + "scripts": [], + "own_tokenizer": false }, { "name": "Munsee", "iso_1_code": null, "iso_3_code": "umu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "594", + "scripts": [], + "own_tokenizer": false }, { "name": "Unami", "iso_1_code": null, "iso_3_code": "unm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "595", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "592", + "scripts": [], + "own_tokenizer": false }, { "name": "Nanticoke-Conoy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nanticoke", "iso_1_code": null, "iso_3_code": "nnt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "597", + "scripts": [], + "own_tokenizer": false }, { "name": "Piscataway", "iso_1_code": null, "iso_3_code": "psy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "598", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "596", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "577", + "scripts": [], + "own_tokenizer": false }, { "name": "Fox", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kickapoo", "iso_1_code": null, "iso_3_code": "kic", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "600", + "scripts": [], + "own_tokenizer": false }, { "name": "Meskwaki", "iso_1_code": null, "iso_3_code": "sac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "601", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "599", + "scripts": [], + "own_tokenizer": false }, { "name": "Ojibwa-Potawatomi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Algonquin", "iso_1_code": null, "iso_3_code": "alq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "603", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chippewa", "iso_1_code": "oj", "iso_3_code": "ciw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "604", + "scripts": [], + "own_tokenizer": false }, { "name": "Ojibwa, Northwestern", "iso_1_code": "oj", "iso_3_code": "ojb", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "605", + "scripts": [ + "Latn", + "Cans" + ], + "own_tokenizer": false }, { "name": "Ojibwa, Central", "iso_1_code": "oj", "iso_3_code": "ojc", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "606", + "scripts": [], + "own_tokenizer": false }, { "name": "Ojibwa, Eastern", "iso_1_code": "oj", "iso_3_code": "ojg", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "607", + "scripts": [], + "own_tokenizer": false }, { "name": "Oji-Cree", "iso_1_code": "oj", "iso_3_code": "ojs", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "608", + "scripts": [], + "own_tokenizer": false }, { "name": "Ojibwa, Western", "iso_1_code": "oj", "iso_3_code": "ojw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "609", + "scripts": [], + "own_tokenizer": false }, { "name": "Ottawa", "iso_1_code": "oj", "iso_3_code": "otw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "610", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Potawatomi", "iso_1_code": null, "iso_3_code": "pot", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "611", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "602", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "557", + "scripts": [], + "own_tokenizer": false }, { "name": "Ritwan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wiyot", "iso_1_code": null, "iso_3_code": "wiy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "613", + "scripts": [], + "own_tokenizer": false }, { "name": "Yurok", "iso_1_code": null, "iso_3_code": "yur", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "614", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "612", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "556", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Amto-Musan.json b/data/Amto-Musan.json index 177ae7dc10115154c89672932fd2b9fadcda4301..92566c1ba7029cbfb5c9bbc20cc8ee89d46f96d0 100644 --- a/data/Amto-Musan.json +++ b/data/Amto-Musan.json @@ -2,24 +2,30 @@ "name": "Amto-Musan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amto", "iso_1_code": null, "iso_3_code": "amt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "616", + "scripts": [], + "own_tokenizer": false }, { "name": "Siawi", "iso_1_code": null, "iso_3_code": "mmp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "617", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "615", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Andamanese.json b/data/Andamanese.json index 16ea029c5648858b7a5f6c9c771e2033509f9616..e85ec8b4e58308caa73f52a6ab002de1cdb6bb31 100644 --- a/data/Andamanese.json +++ b/data/Andamanese.json @@ -2,156 +2,194 @@ "name": "Andamanese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Great Andamanese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Great Andamanese, Mixed", "iso_1_code": null, "iso_3_code": "gac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "620", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aka-Bea", "iso_1_code": null, "iso_3_code": "abj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "622", + "scripts": [], + "own_tokenizer": false }, { "name": "Akar-Bale", "iso_1_code": null, "iso_3_code": "acl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "623", + "scripts": [], + "own_tokenizer": false }, { "name": "Aka-Kede", "iso_1_code": null, "iso_3_code": "akx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "624", + "scripts": [], + "own_tokenizer": false }, { "name": "Aka-Kol", "iso_1_code": null, "iso_3_code": "aky", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "625", + "scripts": [], + "own_tokenizer": false }, { "name": "A-Pucikwar", "iso_1_code": null, "iso_3_code": "apq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "626", + "scripts": [], + "own_tokenizer": false }, { "name": "Oko-Juwoi", "iso_1_code": null, "iso_3_code": "okj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "627", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "621", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aka-Cari", "iso_1_code": null, "iso_3_code": "aci", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "629", + "scripts": [], + "own_tokenizer": false }, { "name": "Aka-Kora", "iso_1_code": null, "iso_3_code": "ack", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "630", + "scripts": [], + "own_tokenizer": false }, { "name": "Aka-Jeru", "iso_1_code": null, "iso_3_code": "akj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "631", + "scripts": [], + "own_tokenizer": false }, { "name": "Aka-Bo", "iso_1_code": null, "iso_3_code": "akm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "632", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "628", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "619", + "scripts": [], + "own_tokenizer": false }, { "name": "South Andamanese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jarawa", "iso_1_code": null, "iso_3_code": "anq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "634", + "scripts": [], + "own_tokenizer": false }, { "name": "\u00d6\u00f1ge", "iso_1_code": null, "iso_3_code": "oon", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "635", + "scripts": [], + "own_tokenizer": false }, { "name": "Sentinel", "iso_1_code": null, "iso_3_code": "std", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "636", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "633", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "618", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Arafundi.json b/data/Arafundi.json index 24254697f4815facc2726a0d1a26a789b7004489..fcb0598f3748c47ac82f422e8eab550afddac102 100644 --- a/data/Arafundi.json +++ b/data/Arafundi.json @@ -2,32 +2,40 @@ "name": "Arafundi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andai", "iso_1_code": null, "iso_3_code": "afd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "638", + "scripts": [], + "own_tokenizer": false }, { "name": "Nanubae", "iso_1_code": null, "iso_3_code": "afk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "639", + "scripts": [], + "own_tokenizer": false }, { "name": "Tapei", "iso_1_code": null, "iso_3_code": "afp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "640", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "637", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Arai (Left May).json b/data/Arai (Left May).json index 1d1be7a8209bce6685ffe7bfde35892571f1c995..ef0f39864c0bc3f729bb494b0ce828e33dc2c878 100644 --- a/data/Arai (Left May).json +++ b/data/Arai (Left May).json @@ -2,56 +2,72 @@ "name": "Arai (Left May)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sawiyanu", "iso_1_code": null, "iso_3_code": "amm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "642", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bo", "iso_1_code": null, "iso_3_code": "bpw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "643", + "scripts": [], + "own_tokenizer": false }, { "name": "Yawuno Teneyo", "iso_1_code": null, "iso_3_code": "itr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "644", + "scripts": [], + "own_tokenizer": false }, { "name": "Nakwi", "iso_1_code": null, "iso_3_code": "nax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "645", + "scripts": [], + "own_tokenizer": false }, { "name": "Nimo", "iso_1_code": null, "iso_3_code": "niw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "646", + "scripts": [], + "own_tokenizer": false }, { "name": "Owiniga", "iso_1_code": null, "iso_3_code": "owi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "647", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "641", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Arauan.json b/data/Arauan.json index 5ad4d764fa7f035389bb113cb1ea570de3055a10..662983b7dc3928d2b2c907ca9a8a4251340010be 100644 --- a/data/Arauan.json +++ b/data/Arauan.json @@ -2,65 +2,87 @@ "name": "Arauan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aru\u00e1", "iso_1_code": null, "iso_3_code": "aru", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "649", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulina", "iso_1_code": null, "iso_3_code": "cul", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "650", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Den\u00ed", "iso_1_code": null, "iso_3_code": "dny", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "651", + "scripts": [], + "own_tokenizer": false }, { "name": "Paumar\u00ed", "iso_1_code": null, "iso_3_code": "pad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "652", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Suruah\u00e1", "iso_1_code": null, "iso_3_code": "swx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "653", + "scripts": [], + "own_tokenizer": false }, { "name": "Jamamadi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jamamad\u00ed", "iso_1_code": null, "iso_3_code": "jaa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "655", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "654", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "648", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Australian.json b/data/Australian.json index 6f21ca5af5a4442f019c18309ac3674e901dd24d..cd3a678d44d65d56419a1c41bb3556462dc1969e 100644 --- a/data/Australian.json +++ b/data/Australian.json @@ -2,4178 +2,5230 @@ "name": "Australian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lanima", "iso_1_code": null, "iso_3_code": "lnw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "657", + "scripts": [], + "own_tokenizer": false }, { "name": "Bunaban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bunuba", "iso_1_code": null, "iso_3_code": "bck", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "659", + "scripts": [], + "own_tokenizer": false }, { "name": "Gooniyandi", "iso_1_code": null, "iso_3_code": "gni", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "660", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "658", + "scripts": [], + "own_tokenizer": false }, { "name": "Daly", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bringen-Wagaydy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bringen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Marrithiyel", "iso_1_code": null, "iso_3_code": "mfr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "664", + "scripts": [], + "own_tokenizer": false }, { "name": "Maridan", "iso_1_code": null, "iso_3_code": "zmd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "665", + "scripts": [], + "own_tokenizer": false }, { "name": "Marti Ke", "iso_1_code": null, "iso_3_code": "zmg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "666", + "scripts": [], + "own_tokenizer": false }, { "name": "Maridjabin", "iso_1_code": null, "iso_3_code": "zmj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "667", + "scripts": [], + "own_tokenizer": false }, { "name": "Marimanindji", "iso_1_code": null, "iso_3_code": "zmm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "668", + "scripts": [], + "own_tokenizer": false }, { "name": "Maringarr", "iso_1_code": null, "iso_3_code": "zmt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "669", + "scripts": [], + "own_tokenizer": false }, { "name": "Mariyedi", "iso_1_code": null, "iso_3_code": "zmy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "670", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "663", + "scripts": [], + "own_tokenizer": false }, { "name": "Wagaydy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ami", "iso_1_code": null, "iso_3_code": "amy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "672", + "scripts": [], + "own_tokenizer": false }, { "name": "Giyug", "iso_1_code": null, "iso_3_code": "giy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "673", + "scripts": [], + "own_tokenizer": false }, { "name": "Wadjiginy", "iso_1_code": null, "iso_3_code": "wdj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "674", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda", "iso_1_code": null, "iso_3_code": "zma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "675", + "scripts": [], + "own_tokenizer": false }, { "name": "Maranunggu", "iso_1_code": null, "iso_3_code": "zmr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "676", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "671", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "662", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagmalag", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Daly Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kamu", "iso_1_code": null, "iso_3_code": "xmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "679", + "scripts": [], + "own_tokenizer": false }, { "name": "Matngala", "iso_1_code": null, "iso_3_code": "zml", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "680", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "678", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagmalag Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Malak Malak", "iso_1_code": null, "iso_3_code": "mpb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "682", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuwema", "iso_1_code": null, "iso_3_code": "woa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "683", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "681", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "677", + "scripts": [], + "own_tokenizer": false }, { "name": "Marriammu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Marriammu", "iso_1_code": null, "iso_3_code": "xru", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "685", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "684", + "scripts": [], + "own_tokenizer": false }, { "name": "Murrinh-Patha", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Murrinh-Patha", "iso_1_code": null, "iso_3_code": "mwf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "687", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngan\u2019gityemerri", "iso_1_code": null, "iso_3_code": "nam", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "688", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "686", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "661", + "scripts": [], + "own_tokenizer": false }, { "name": "Djamindjungan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Djamindjung", "iso_1_code": null, "iso_3_code": "djd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "690", + "scripts": [], + "own_tokenizer": false }, { "name": "Nungali", "iso_1_code": null, "iso_3_code": "nug", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "691", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "689", + "scripts": [], + "own_tokenizer": false }, { "name": "Djeragan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gadjerawang", "iso_1_code": null, "iso_3_code": "gdh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "693", + "scripts": [], + "own_tokenizer": false }, { "name": "Kija", "iso_1_code": null, "iso_3_code": "gia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "694", + "scripts": [], + "own_tokenizer": false }, { "name": "Miriwoong", "iso_1_code": null, "iso_3_code": "mep", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "695", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "692", + "scripts": [], + "own_tokenizer": false }, { "name": "Giimbiyu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Erre", "iso_1_code": null, "iso_3_code": "err", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "697", + "scripts": [], + "own_tokenizer": false }, { "name": "Urningangg", "iso_1_code": null, "iso_3_code": "urc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "698", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangerr", "iso_1_code": null, "iso_3_code": "zme", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "699", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "696", + "scripts": [], + "own_tokenizer": false }, { "name": "Gunwingguan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Burarran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Burarra", "iso_1_code": null, "iso_3_code": "bvr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "702", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Djeebbana", "iso_1_code": null, "iso_3_code": "djj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "703", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurr-goni", "iso_1_code": null, "iso_3_code": "gge", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "704", + "scripts": [], + "own_tokenizer": false }, { "name": "Na-kara", "iso_1_code": null, "iso_3_code": "nck", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "705", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "701", + "scripts": [], + "own_tokenizer": false }, { "name": "Djauanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Djauan", "iso_1_code": null, "iso_3_code": "djn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "707", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "706", + "scripts": [], + "own_tokenizer": false }, { "name": "Enindhilyagwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anindilyakwa", "iso_1_code": null, "iso_3_code": "aoi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "709", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngandi", "iso_1_code": null, "iso_3_code": "nid", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "710", + "scripts": [], + "own_tokenizer": false }, { "name": "Nunggubuyu", "iso_1_code": null, "iso_3_code": "nuy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "711", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "708", + "scripts": [], + "own_tokenizer": false }, { "name": "Gagudjuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gagadu", "iso_1_code": null, "iso_3_code": "gbu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "713", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "712", + "scripts": [], + "own_tokenizer": false }, { "name": "Gungaraganyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kungarakany", "iso_1_code": null, "iso_3_code": "ggk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "715", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "714", + "scripts": [], + "own_tokenizer": false }, { "name": "Gunwinggic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gunwinggu", "iso_1_code": null, "iso_3_code": "gup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "717", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunbarlang", "iso_1_code": null, "iso_3_code": "wlg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "718", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "716", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangarayic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mangarrayi", "iso_1_code": null, "iso_3_code": "mpc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "720", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "719", + "scripts": [], + "own_tokenizer": false }, { "name": "Maran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yugul", "iso_1_code": null, "iso_3_code": "ygu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "722", + "scripts": [], + "own_tokenizer": false }, { "name": "Alawic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alawa", "iso_1_code": null, "iso_3_code": "alh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "724", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "723", + "scripts": [], + "own_tokenizer": false }, { "name": "Mara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Marra", "iso_1_code": null, "iso_3_code": "mec", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "726", + "scripts": [], + "own_tokenizer": false }, { "name": "Wandarang", "iso_1_code": null, "iso_3_code": "wnd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "727", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "725", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "721", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngalkbun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dalabon", "iso_1_code": null, "iso_3_code": "ngk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "729", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "728", + "scripts": [], + "own_tokenizer": false }, { "name": "Rembargic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ngalakgan", "iso_1_code": null, "iso_3_code": "nig", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "731", + "scripts": [], + "own_tokenizer": false }, { "name": "Rembarrnga", "iso_1_code": null, "iso_3_code": "rmb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "732", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "730", + "scripts": [], + "own_tokenizer": false }, { "name": "Wagiman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wagiman", "iso_1_code": null, "iso_3_code": "waq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "734", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "733", + "scripts": [], + "own_tokenizer": false }, { "name": "Warayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Waray", "iso_1_code": null, "iso_3_code": "wrz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "736", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "735", + "scripts": [], + "own_tokenizer": false }, { "name": "Yangmanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dagoman", "iso_1_code": null, "iso_3_code": "dgn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "738", + "scripts": [], + "own_tokenizer": false }, { "name": "Yangman", "iso_1_code": null, "iso_3_code": "jng", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "739", + "scripts": [], + "own_tokenizer": false }, { "name": "Wardaman", "iso_1_code": null, "iso_3_code": "wrr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "740", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "737", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "700", + "scripts": [], + "own_tokenizer": false }, { "name": "Laragiyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Laragia", "iso_1_code": null, "iso_3_code": "lrg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "742", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "741", + "scripts": [], + "own_tokenizer": false }, { "name": "Limilngan-Wulna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Limilngan", "iso_1_code": null, "iso_3_code": "lmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "744", + "scripts": [], + "own_tokenizer": false }, { "name": "Wulna", "iso_1_code": null, "iso_3_code": "wux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "745", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "743", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyulnyulan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bardi", "iso_1_code": null, "iso_3_code": "bcj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "747", + "scripts": [], + "own_tokenizer": false }, { "name": "Djawi", "iso_1_code": null, "iso_3_code": "djw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "748", + "scripts": [], + "own_tokenizer": false }, { "name": "Dyaberdyaber", "iso_1_code": null, "iso_3_code": "dyb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "749", + "scripts": [], + "own_tokenizer": false }, { "name": "Dyugun", "iso_1_code": null, "iso_3_code": "dyd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "750", + "scripts": [], + "own_tokenizer": false }, { "name": "Nimanbur", "iso_1_code": null, "iso_3_code": "nmp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "751", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyikina", "iso_1_code": null, "iso_3_code": "nyh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "752", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyulnyul", "iso_1_code": null, "iso_3_code": "nyv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "753", + "scripts": [], + "own_tokenizer": false }, { "name": "Warrwa", "iso_1_code": null, "iso_3_code": "wwr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "754", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngumbarl", "iso_1_code": null, "iso_3_code": "xnm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "755", + "scripts": [], + "own_tokenizer": false }, { "name": "Yawuru", "iso_1_code": null, "iso_3_code": "ywr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "756", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "746", + "scripts": [], + "own_tokenizer": false }, { "name": "Pama-Nyungan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bidhawal", "iso_1_code": null, "iso_3_code": "ihw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "758", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurnai", "iso_1_code": null, "iso_3_code": "unn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "759", + "scripts": [], + "own_tokenizer": false }, { "name": "Keerray-Woorroong", "iso_1_code": null, "iso_3_code": "wkr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "760", + "scripts": [], + "own_tokenizer": false }, { "name": "Bindal", "iso_1_code": null, "iso_3_code": "xbd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "761", + "scripts": [], + "own_tokenizer": false }, { "name": "Bigambal", "iso_1_code": null, "iso_3_code": "xbe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "762", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuyu", "iso_1_code": null, "iso_3_code": "yxu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "763", + "scripts": [], + "own_tokenizer": false }, { "name": "Arandic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andegerebinha", "iso_1_code": null, "iso_3_code": "adg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "765", + "scripts": [], + "own_tokenizer": false }, { "name": "Arrernte, Eastern", "iso_1_code": null, "iso_3_code": "aer", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "766", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Alyawarr", "iso_1_code": null, "iso_3_code": "aly", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "767", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Anmatyerr", "iso_1_code": null, "iso_3_code": "amx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "768", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Arrarnta, Western", "iso_1_code": null, "iso_3_code": "are", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "769", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ayerrerenge", "iso_1_code": null, "iso_3_code": "axe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "770", + "scripts": [], + "own_tokenizer": false }, { "name": "Aranda, Lower Southern", "iso_1_code": null, "iso_3_code": "axl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "771", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaytetye", "iso_1_code": null, "iso_3_code": "gbb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "772", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "764", + "scripts": [], + "own_tokenizer": false }, { "name": "Baagandji", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Paakantyi", "iso_1_code": null, "iso_3_code": "drl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "774", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "773", + "scripts": [], + "own_tokenizer": false }, { "name": "Bandjalangic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bandjalang", "iso_1_code": null, "iso_3_code": "bdy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "776", + "scripts": [], + "own_tokenizer": false }, { "name": "Githabul", "iso_1_code": null, "iso_3_code": "gih", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "777", + "scripts": [], + "own_tokenizer": false }, { "name": "Arakwal", "iso_1_code": null, "iso_3_code": "rkw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "778", + "scripts": [], + "own_tokenizer": false }, { "name": "Minjungbal", "iso_1_code": null, "iso_3_code": "xjb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "779", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "775", + "scripts": [], + "own_tokenizer": false }, { "name": "Bungandidj", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gunditjmara", "iso_1_code": null, "iso_3_code": "gjm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "781", + "scripts": [], + "own_tokenizer": false }, { "name": "Bunganditj", "iso_1_code": null, "iso_3_code": "xbg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "782", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "780", + "scripts": [], + "own_tokenizer": false }, { "name": "Durubulic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jandai", "iso_1_code": null, "iso_3_code": "jan", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "784", + "scripts": [], + "own_tokenizer": false }, { "name": "Nunukul", "iso_1_code": null, "iso_3_code": "xnu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "785", + "scripts": [], + "own_tokenizer": false }, { "name": "Yagara", "iso_1_code": null, "iso_3_code": "yxg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "786", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "783", + "scripts": [], + "own_tokenizer": false }, { "name": "Dyangadi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dyangadi", "iso_1_code": null, "iso_3_code": "dyn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "788", + "scripts": [], + "own_tokenizer": false }, { "name": "Nganyaywana", "iso_1_code": null, "iso_3_code": "nyx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "789", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "787", + "scripts": [], + "own_tokenizer": false }, { "name": "Dyirbalic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dyirbal", "iso_1_code": null, "iso_3_code": "dbl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "791", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuru", "iso_1_code": null, "iso_3_code": "ljx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "792", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyawaygi", "iso_1_code": null, "iso_3_code": "nyt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "793", + "scripts": [], + "own_tokenizer": false }, { "name": "Warrgamay", "iso_1_code": null, "iso_3_code": "wgy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "794", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "790", + "scripts": [], + "own_tokenizer": false }, { "name": "Flinders Island", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Flinders Island", "iso_1_code": null, "iso_3_code": "fln", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "796", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "795", + "scripts": [], + "own_tokenizer": false }, { "name": "Galgadungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kalkutung", "iso_1_code": null, "iso_3_code": "ktg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "798", + "scripts": [], + "own_tokenizer": false }, { "name": "Wakabunga", "iso_1_code": null, "iso_3_code": "wwb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "799", + "scripts": [], + "own_tokenizer": false }, { "name": "Yalarnnga", "iso_1_code": null, "iso_3_code": "ylr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "800", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "797", + "scripts": [], + "own_tokenizer": false }, { "name": "Gumbaynggiric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kumbainggar", "iso_1_code": null, "iso_3_code": "kgs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "802", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaygir", "iso_1_code": null, "iso_3_code": "xya", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "803", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "801", + "scripts": [], + "own_tokenizer": false }, { "name": "Guugu-Yimidhirr", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Barrow Point", "iso_1_code": null, "iso_3_code": "bpt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "805", + "scripts": [], + "own_tokenizer": false }, { "name": "Guugu Yimidhirr", "iso_1_code": null, "iso_3_code": "kky", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "806", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "804", + "scripts": [], + "own_tokenizer": false }, { "name": "Kala Lagaw Ya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kala Lagaw Ya", "iso_1_code": null, "iso_3_code": "mwp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "808", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "807", + "scripts": [], + "own_tokenizer": false }, { "name": "Karnic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karenggapa", "iso_1_code": null, "iso_3_code": "eaa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "810", + "scripts": [], + "own_tokenizer": false }, { "name": "Kungardutyi", "iso_1_code": null, "iso_3_code": "gdt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "811", + "scripts": [], + "own_tokenizer": false }, { "name": "Nhirrpi", "iso_1_code": null, "iso_3_code": "hrp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "812", + "scripts": [], + "own_tokenizer": false }, { "name": "Kungkari", "iso_1_code": null, "iso_3_code": "lku", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "813", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngantangarra", "iso_1_code": null, "iso_3_code": "ntg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "814", + "scripts": [], + "own_tokenizer": false }, { "name": "Karuwali", "iso_1_code": null, "iso_3_code": "rxw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "815", + "scripts": [], + "own_tokenizer": false }, { "name": "Wangkayutyuru", "iso_1_code": null, "iso_3_code": "wky", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "816", + "scripts": [], + "own_tokenizer": false }, { "name": "Pirriya", "iso_1_code": null, "iso_3_code": "xpa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "817", + "scripts": [], + "own_tokenizer": false }, { "name": "Yarluyandi", "iso_1_code": null, "iso_3_code": "yry", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "818", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayawali", "iso_1_code": null, "iso_3_code": "yxa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "819", + "scripts": [], + "own_tokenizer": false }, { "name": "Karna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pirlatapa", "iso_1_code": null, "iso_3_code": "bxi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "821", + "scripts": [], + "own_tokenizer": false }, { "name": "Diyari", "iso_1_code": null, "iso_3_code": "dif", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "822", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngamini", "iso_1_code": null, "iso_3_code": "nmv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "823", + "scripts": [], + "own_tokenizer": false }, { "name": "Yandruwandha", "iso_1_code": null, "iso_3_code": "ynd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "824", + "scripts": [], + "own_tokenizer": false }, { "name": "Yawarawarga", "iso_1_code": null, "iso_3_code": "yww", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "825", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "820", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngura", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Garlali", "iso_1_code": null, "iso_3_code": "gll", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "827", + "scripts": [], + "own_tokenizer": false }, { "name": "Badjiri", "iso_1_code": null, "iso_3_code": "jbi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "828", + "scripts": [], + "own_tokenizer": false }, { "name": "Punthamara", "iso_1_code": null, "iso_3_code": "xpt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "829", + "scripts": [], + "own_tokenizer": false }, { "name": "Wangkumara", "iso_1_code": null, "iso_3_code": "xwk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "830", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "826", + "scripts": [], + "own_tokenizer": false }, { "name": "Palku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arabana", "iso_1_code": null, "iso_3_code": "ard", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "832", + "scripts": [], + "own_tokenizer": false }, { "name": "Pitta Pitta", "iso_1_code": null, "iso_3_code": "pit", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "833", + "scripts": [], + "own_tokenizer": false }, { "name": "Wangkangurru", "iso_1_code": null, "iso_3_code": "wgg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "834", + "scripts": [], + "own_tokenizer": false }, { "name": "Wanggamala", "iso_1_code": null, "iso_3_code": "wnm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "835", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "831", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "809", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Daungwurrung", "iso_1_code": null, "iso_3_code": "dgw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "837", + "scripts": [], + "own_tokenizer": false }, { "name": "Djadjawurrung", "iso_1_code": null, "iso_3_code": "dja", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "838", + "scripts": [], + "own_tokenizer": false }, { "name": "Madhi Madhi", "iso_1_code": null, "iso_3_code": "dmd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "839", + "scripts": [], + "own_tokenizer": false }, { "name": "Ladji Ladji", "iso_1_code": null, "iso_3_code": "llj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "840", + "scripts": [], + "own_tokenizer": false }, { "name": "Nari Nari", "iso_1_code": null, "iso_3_code": "rnr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "841", + "scripts": [], + "own_tokenizer": false }, { "name": "Djabwurrung", "iso_1_code": null, "iso_3_code": "tjw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "842", + "scripts": [], + "own_tokenizer": false }, { "name": "Wergaia", "iso_1_code": null, "iso_3_code": "weg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "843", + "scripts": [], + "own_tokenizer": false }, { "name": "Wathawurrung", "iso_1_code": null, "iso_3_code": "wth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "844", + "scripts": [], + "own_tokenizer": false }, { "name": "Woiwurrung", "iso_1_code": null, "iso_3_code": "wyi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "845", + "scripts": [], + "own_tokenizer": false }, { "name": "Wadi Wadi", "iso_1_code": null, "iso_3_code": "xwd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "846", + "scripts": [], + "own_tokenizer": false }, { "name": "Wemba Wemba", "iso_1_code": null, "iso_3_code": "xww", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "847", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Barababaraba", "iso_1_code": null, "iso_3_code": "rbp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "849", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "848", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "836", + "scripts": [], + "own_tokenizer": false }, { "name": "Lardil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lardil", "iso_1_code": null, "iso_3_code": "lbz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "851", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "850", + "scripts": [], + "own_tokenizer": false }, { "name": "Maric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bidjara", "iso_1_code": null, "iso_3_code": "bym", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "853", + "scripts": [], + "own_tokenizer": false }, { "name": "Biri", "iso_1_code": null, "iso_3_code": "bzr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "854", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhungaloo", "iso_1_code": null, "iso_3_code": "dhx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "855", + "scripts": [], + "own_tokenizer": false }, { "name": "Gugu Badhun", "iso_1_code": null, "iso_3_code": "gdc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "856", + "scripts": [], + "own_tokenizer": false }, { "name": "Gangulu", "iso_1_code": null, "iso_3_code": "gnl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "857", + "scripts": [], + "own_tokenizer": false }, { "name": "Guwamu", "iso_1_code": null, "iso_3_code": "gwu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "858", + "scripts": [], + "own_tokenizer": false }, { "name": "Gungabula", "iso_1_code": null, "iso_3_code": "gyf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "859", + "scripts": [], + "own_tokenizer": false }, { "name": "Gunya", "iso_1_code": null, "iso_3_code": "gyy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "860", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunggari", "iso_1_code": null, "iso_3_code": "kgl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "861", + "scripts": [], + "own_tokenizer": false }, { "name": "Yirandali", "iso_1_code": null, "iso_3_code": "ljw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "862", + "scripts": [], + "own_tokenizer": false }, { "name": "Wadjigu", "iso_1_code": null, "iso_3_code": "wdu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "863", + "scripts": [], + "own_tokenizer": false }, { "name": "Wadjabangayi", "iso_1_code": null, "iso_3_code": "wdy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "864", + "scripts": [], + "own_tokenizer": false }, { "name": "Warungu", "iso_1_code": null, "iso_3_code": "wrg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "865", + "scripts": [], + "own_tokenizer": false }, { "name": "Lower Burdekin", "iso_1_code": null, "iso_3_code": "xbb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "866", + "scripts": [], + "own_tokenizer": false }, { "name": "Garingbal", "iso_1_code": null, "iso_3_code": "xgi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "867", + "scripts": [], + "own_tokenizer": false }, { "name": "Dharumbal", "iso_1_code": null, "iso_3_code": "xgm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "868", + "scripts": [], + "own_tokenizer": false }, { "name": "Guwa", "iso_1_code": null, "iso_3_code": "xgw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "869", + "scripts": [], + "own_tokenizer": false }, { "name": "Wotjobaluk", "iso_1_code": null, "iso_3_code": "xwt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "870", + "scripts": [], + "own_tokenizer": false }, { "name": "Yandjibara", "iso_1_code": null, "iso_3_code": "xyb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "871", + "scripts": [], + "own_tokenizer": false }, { "name": "Yanda", "iso_1_code": null, "iso_3_code": "yda", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "872", + "scripts": [], + "own_tokenizer": false }, { "name": "Yiningayi", "iso_1_code": null, "iso_3_code": "ygi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "873", + "scripts": [], + "own_tokenizer": false }, { "name": "Margany", "iso_1_code": null, "iso_3_code": "zmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "874", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandandanyi", "iso_1_code": null, "iso_3_code": "zmk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "875", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "852", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mbara", "iso_1_code": null, "iso_3_code": "mvl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "877", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "876", + "scripts": [], + "own_tokenizer": false }, { "name": "Muruwaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Muruwari", "iso_1_code": null, "iso_3_code": "zmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "879", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "878", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarinyeric-Yithayithic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dadi Dadi", "iso_1_code": null, "iso_3_code": "dda", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "881", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarrindjeri", "iso_1_code": null, "iso_3_code": "nay", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "882", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngayawung", "iso_1_code": null, "iso_3_code": "nwg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "883", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "880", + "scripts": [], + "own_tokenizer": false }, { "name": "Pallanganmiddang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dhudhuroa", "iso_1_code": null, "iso_3_code": "ddr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "885", + "scripts": [], + "own_tokenizer": false }, { "name": "Pallanganmiddang", "iso_1_code": null, "iso_3_code": "pmd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "886", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "884", + "scripts": [], + "own_tokenizer": false }, { "name": "Paman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Olkol", "iso_1_code": null, "iso_3_code": "olk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "888", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagalaka", "iso_1_code": null, "iso_3_code": "tgz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "889", + "scripts": [], + "own_tokenizer": false }, { "name": "Gudang", "iso_1_code": null, "iso_3_code": "xgd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "890", + "scripts": [], + "own_tokenizer": false }, { "name": "Yatay", "iso_1_code": null, "iso_3_code": "yty", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "891", + "scripts": [], + "own_tokenizer": false }, { "name": "Yinwum", "iso_1_code": null, "iso_3_code": "yxm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "892", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kunjen", "iso_1_code": null, "iso_3_code": "kjn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "894", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "893", + "scripts": [], + "own_tokenizer": false }, { "name": "Coastal Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Koko-Bera", "iso_1_code": null, "iso_3_code": "kkp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "896", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "895", + "scripts": [], + "own_tokenizer": false }, { "name": "Flinders Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gugadj", "iso_1_code": null, "iso_3_code": "ggd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "898", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "897", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamalamic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lamalama", "iso_1_code": null, "iso_3_code": "lby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "900", + "scripts": [], + "own_tokenizer": false }, { "name": "Morrobalama", "iso_1_code": null, "iso_3_code": "umg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "901", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "899", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayabic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ngawun", "iso_1_code": null, "iso_3_code": "nxn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "903", + "scripts": [], + "own_tokenizer": false }, { "name": "Wunumara", "iso_1_code": null, "iso_3_code": "wnn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "904", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayaguduna", "iso_1_code": null, "iso_3_code": "xmy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "905", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayi-Yapi", "iso_1_code": null, "iso_3_code": "xyj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "906", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayi-Kulan", "iso_1_code": null, "iso_3_code": "xyk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "907", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayi-Thakurti", "iso_1_code": null, "iso_3_code": "xyt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "908", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "902", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbariman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gugu Warra", "iso_1_code": null, "iso_3_code": "wrw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "910", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbariman-Gudhinma", "iso_1_code": null, "iso_3_code": "zmv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "911", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "909", + "scripts": [], + "own_tokenizer": false }, { "name": "Middle Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ayabadhu", "iso_1_code": null, "iso_3_code": "ayd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "913", + "scripts": [], + "own_tokenizer": false }, { "name": "Pakanha", "iso_1_code": null, "iso_3_code": "pkn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "914", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuku-Ugbanh", "iso_1_code": null, "iso_3_code": "ugb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "915", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuku-Uwanh", "iso_1_code": null, "iso_3_code": "uwa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "916", + "scripts": [], + "own_tokenizer": false }, { "name": "Wik-Epa", "iso_1_code": null, "iso_3_code": "wie", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "917", + "scripts": [], + "own_tokenizer": false }, { "name": "Wik-Keyangan", "iso_1_code": null, "iso_3_code": "wif", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "918", + "scripts": [], + "own_tokenizer": false }, { "name": "Wik Ngathan", "iso_1_code": null, "iso_3_code": "wig", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "919", + "scripts": [], + "own_tokenizer": false }, { "name": "Wik-Me\u2019anha", "iso_1_code": null, "iso_3_code": "wih", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "920", + "scripts": [], + "own_tokenizer": false }, { "name": "Wik-Iiyanh", "iso_1_code": null, "iso_3_code": "wij", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "921", + "scripts": [], + "own_tokenizer": false }, { "name": "Wikalkan", "iso_1_code": null, "iso_3_code": "wik", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "922", + "scripts": [], + "own_tokenizer": false }, { "name": "Wik-Mungkan", "iso_1_code": null, "iso_3_code": "wim", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "923", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wikngenchera", "iso_1_code": null, "iso_3_code": "wua", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "924", + "scripts": [], + "own_tokenizer": false }, { "name": "Kugu-Muminh", "iso_1_code": null, "iso_3_code": "xmh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "925", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuku-Mu\u2019inh", "iso_1_code": null, "iso_3_code": "xmp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "926", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuku-Mangk", "iso_1_code": null, "iso_3_code": "xmq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "927", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "912", + "scripts": [], + "own_tokenizer": false }, { "name": "Norman Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Areba", "iso_1_code": null, "iso_3_code": "aea", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "929", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurdjar", "iso_1_code": null, "iso_3_code": "gdj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "930", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuthant", "iso_1_code": null, "iso_3_code": "xut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "931", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "928", + "scripts": [], + "own_tokenizer": false }, { "name": "Northeastern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kanju", "iso_1_code": null, "iso_3_code": "kbe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "933", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuuku-Ya\u2019u", "iso_1_code": null, "iso_3_code": "kuy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "934", + "scripts": [], + "own_tokenizer": false }, { "name": "Umpila", "iso_1_code": null, "iso_3_code": "ump", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "935", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "932", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alngith", "iso_1_code": null, "iso_3_code": "aid", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "937", + "scripts": [], + "own_tokenizer": false }, { "name": "Atampaya", "iso_1_code": null, "iso_3_code": "amz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "938", + "scripts": [], + "own_tokenizer": false }, { "name": "Angkamuthi", "iso_1_code": null, "iso_3_code": "avm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "939", + "scripts": [], + "own_tokenizer": false }, { "name": "Anguthimri", "iso_1_code": null, "iso_3_code": "awg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "940", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndra\u2019ngith", "iso_1_code": null, "iso_3_code": "dgt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "941", + "scripts": [], + "own_tokenizer": false }, { "name": "Adithinngithigh", "iso_1_code": null, "iso_3_code": "dth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "942", + "scripts": [], + "own_tokenizer": false }, { "name": "Awngthim", "iso_1_code": null, "iso_3_code": "gwm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "943", + "scripts": [], + "own_tokenizer": false }, { "name": "Leningitij", "iso_1_code": null, "iso_3_code": "lnj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "944", + "scripts": [], + "own_tokenizer": false }, { "name": "Arritinngithigh", "iso_1_code": null, "iso_3_code": "rrt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "945", + "scripts": [], + "own_tokenizer": false }, { "name": "Tjungundji", "iso_1_code": null, "iso_3_code": "tjj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "946", + "scripts": [], + "own_tokenizer": false }, { "name": "Uradhi", "iso_1_code": null, "iso_3_code": "urf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "947", + "scripts": [], + "own_tokenizer": false }, { "name": "Mpalitjanh", "iso_1_code": null, "iso_3_code": "xpj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "948", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "936", + "scripts": [], + "own_tokenizer": false }, { "name": "Rarmul Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aghu-Tharnggala", "iso_1_code": null, "iso_3_code": "gtu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "950", + "scripts": [], + "own_tokenizer": false }, { "name": "Ikaranggal", "iso_1_code": null, "iso_3_code": "ikr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "951", + "scripts": [], + "own_tokenizer": false }, { "name": "Thaypan", "iso_1_code": null, "iso_3_code": "typ", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "952", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "949", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Barbaram", "iso_1_code": null, "iso_3_code": "vmb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "954", + "scripts": [], + "own_tokenizer": false }, { "name": "Wamin", "iso_1_code": null, "iso_3_code": "wmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "955", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "953", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwestern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kok-Nar", "iso_1_code": null, "iso_3_code": "gko", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "957", + "scripts": [], + "own_tokenizer": false }, { "name": "Koko Babangk", "iso_1_code": null, "iso_3_code": "okg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "958", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuuk-Yak", "iso_1_code": null, "iso_3_code": "uky", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "959", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "956", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbindhamuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Umbindhamu", "iso_1_code": null, "iso_3_code": "umd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "961", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "960", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kuuk Thayorre", "iso_1_code": null, "iso_3_code": "thd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "963", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "962", + "scripts": [], + "own_tokenizer": false }, { "name": "Yir Yoront", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yirrk-Mel", "iso_1_code": null, "iso_3_code": "yrm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "965", + "scripts": [], + "own_tokenizer": false }, { "name": "Yir-Yoront", "iso_1_code": null, "iso_3_code": "yyr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "966", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "964", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "887", + "scripts": [], + "own_tokenizer": false }, { "name": "South-West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kalaamaya", "iso_1_code": null, "iso_3_code": "lkm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "968", + "scripts": [], + "own_tokenizer": false }, { "name": "Walangama", "iso_1_code": null, "iso_3_code": "nlw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "969", + "scripts": [], + "own_tokenizer": false }, { "name": "Coastal Ngayarda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Djiwarli", "iso_1_code": null, "iso_3_code": "dze", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "971", + "scripts": [], + "own_tokenizer": false }, { "name": "Thiin", "iso_1_code": null, "iso_3_code": "iin", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "972", + "scripts": [], + "own_tokenizer": false }, { "name": "Nhuwala", "iso_1_code": null, "iso_3_code": "nhf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "973", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarluma", "iso_1_code": null, "iso_3_code": "nrl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "974", + "scripts": [], + "own_tokenizer": false }, { "name": "Kariyarra", "iso_1_code": null, "iso_3_code": "vka", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "975", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurrama", "iso_1_code": null, "iso_3_code": "vku", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "976", + "scripts": [], + "own_tokenizer": false }, { "name": "Martuyhunira", "iso_1_code": null, "iso_3_code": "vma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "977", + "scripts": [], + "own_tokenizer": false }, { "name": "Yindjibarndi", "iso_1_code": null, "iso_3_code": "yij", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "978", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "970", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhalandji", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dhalandji", "iso_1_code": null, "iso_3_code": "dhl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "980", + "scripts": [], + "own_tokenizer": false }, { "name": "Pinigura", "iso_1_code": null, "iso_3_code": "pnv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "981", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "979", + "scripts": [], + "own_tokenizer": false }, { "name": "Inland Ngayarda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dhargari", "iso_1_code": null, "iso_3_code": "dhr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "983", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyamal", "iso_1_code": null, "iso_3_code": "nly", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "984", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarla", "iso_1_code": null, "iso_3_code": "nrk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "985", + "scripts": [], + "own_tokenizer": false }, { "name": "Banyjima", "iso_1_code": null, "iso_3_code": "pnw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "986", + "scripts": [], + "own_tokenizer": false }, { "name": "Tjurruru", "iso_1_code": null, "iso_3_code": "tju", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "987", + "scripts": [], + "own_tokenizer": false }, { "name": "Wariyangga", "iso_1_code": null, "iso_3_code": "wri", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "988", + "scripts": [], + "own_tokenizer": false }, { "name": "Yinhawangka", "iso_1_code": null, "iso_3_code": "ywg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "989", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "982", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanyara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bayungu", "iso_1_code": null, "iso_3_code": "bxj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "991", + "scripts": [], + "own_tokenizer": false }, { "name": "Burduna", "iso_1_code": null, "iso_3_code": "bxn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "992", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "990", + "scripts": [], + "own_tokenizer": false }, { "name": "Malgana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Malgana", "iso_1_code": null, "iso_3_code": "vml", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "994", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "993", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangala", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mangala", "iso_1_code": null, "iso_3_code": "mem", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "996", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "995", + "scripts": [], + "own_tokenizer": false }, { "name": "Marngu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karajarri", "iso_1_code": null, "iso_3_code": "gbd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "998", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyangumarta", "iso_1_code": null, "iso_3_code": "nna", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "999", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "997", + "scripts": [], + "own_tokenizer": false }, { "name": "Mirning", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mirning", "iso_1_code": null, "iso_3_code": "gmr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1001", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalarko", "iso_1_code": null, "iso_3_code": "kba", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1002", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngadjunmaya", "iso_1_code": null, "iso_3_code": "nju", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1003", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1000", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Warlpiri", "iso_1_code": null, "iso_3_code": "wbp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1005", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Warlmanpa", "iso_1_code": null, "iso_3_code": "wrl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1006", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1004", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngumbin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jaru", "iso_1_code": null, "iso_3_code": "ddj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1008", + "scripts": [], + "own_tokenizer": false }, { "name": "Mudburra", "iso_1_code": null, "iso_3_code": "dmw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1009", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurindji", "iso_1_code": null, "iso_3_code": "gue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1010", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarinyman", "iso_1_code": null, "iso_3_code": "nbj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1011", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngardi", "iso_1_code": null, "iso_3_code": "rxd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1012", + "scripts": [], + "own_tokenizer": false }, { "name": "Walmajarri", "iso_1_code": null, "iso_3_code": "wmt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1013", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1007", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyungar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nyungar", "iso_1_code": null, "iso_3_code": "nys", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1015", + "scripts": [], + "own_tokenizer": false }, { "name": "Pinjarup", "iso_1_code": null, "iso_3_code": "pnj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1016", + "scripts": [], + "own_tokenizer": false }, { "name": "Wardandi", "iso_1_code": null, "iso_3_code": "wxw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1017", + "scripts": [], + "own_tokenizer": false }, { "name": "Bibbulman", "iso_1_code": null, "iso_3_code": "xbp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1018", + "scripts": [], + "own_tokenizer": false }, { "name": "Goreng", "iso_1_code": null, "iso_3_code": "xgg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1019", + "scripts": [], + "own_tokenizer": false }, { "name": "Nganakarti", "iso_1_code": null, "iso_3_code": "xnk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1020", + "scripts": [], + "own_tokenizer": false }, { "name": "Minang", "iso_1_code": null, "iso_3_code": "xrg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1021", + "scripts": [], + "own_tokenizer": false }, { "name": "Wajuk", "iso_1_code": null, "iso_3_code": "xwj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1022", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1014", + "scripts": [], + "own_tokenizer": false }, { "name": "Wadjari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Badimaya", "iso_1_code": null, "iso_3_code": "bia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1024", + "scripts": [], + "own_tokenizer": false }, { "name": "Wajarri", "iso_1_code": null, "iso_3_code": "wbv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1025", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1023", + "scripts": [], + "own_tokenizer": false }, { "name": "Wati", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Antikarinya", "iso_1_code": null, "iso_3_code": "ant", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1027", + "scripts": [], + "own_tokenizer": false }, { "name": "Yankunytjatjara", "iso_1_code": null, "iso_3_code": "kdd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1028", + "scripts": [], + "own_tokenizer": false }, { "name": "Kokata", "iso_1_code": null, "iso_3_code": "ktd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1029", + "scripts": [], + "own_tokenizer": false }, { "name": "Kukatja", "iso_1_code": null, "iso_3_code": "kux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1030", + "scripts": [], + "own_tokenizer": false }, { "name": "Martu Wangka", "iso_1_code": null, "iso_3_code": "mpj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1031", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngaanyatjarra", "iso_1_code": null, "iso_3_code": "ntj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1032", + "scripts": [], + "own_tokenizer": false }, { "name": "Pintupi-Luritja", "iso_1_code": null, "iso_3_code": "piu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1033", + "scripts": [], + "own_tokenizer": false }, { "name": "Pitjantjatjara", "iso_1_code": null, "iso_3_code": "pjt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1034", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pintiini", "iso_1_code": null, "iso_3_code": "pti", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1035", + "scripts": [], + "own_tokenizer": false }, { "name": "Tjupany", "iso_1_code": null, "iso_3_code": "tjp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1036", + "scripts": [], + "own_tokenizer": false }, { "name": "Warnman", "iso_1_code": null, "iso_3_code": "wbt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1037", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiyaparli", "iso_1_code": null, "iso_3_code": "xny", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1038", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1026", + "scripts": [], + "own_tokenizer": false }, { "name": "Yinggarda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nhanda", "iso_1_code": null, "iso_3_code": "nha", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1040", + "scripts": [], + "own_tokenizer": false }, { "name": "Yinggarda", "iso_1_code": null, "iso_3_code": "yia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1041", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1039", + "scripts": [], + "own_tokenizer": false }, { "name": "Yura", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Adnyamathanha", "iso_1_code": null, "iso_3_code": "adt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1043", + "scripts": [], + "own_tokenizer": false }, { "name": "Barngarla", "iso_1_code": null, "iso_3_code": "bjb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1044", + "scripts": [], + "own_tokenizer": false }, { "name": "Guyani", "iso_1_code": null, "iso_3_code": "gvy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1045", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngadjuri", "iso_1_code": null, "iso_3_code": "jui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1046", + "scripts": [], + "own_tokenizer": false }, { "name": "Narungga", "iso_1_code": null, "iso_3_code": "nnr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1047", + "scripts": [], + "own_tokenizer": false }, { "name": "Nugunu", "iso_1_code": null, "iso_3_code": "nnv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1048", + "scripts": [], + "own_tokenizer": false }, { "name": "Nauo", "iso_1_code": null, "iso_3_code": "nwo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1049", + "scripts": [], + "own_tokenizer": false }, { "name": "Wirangu", "iso_1_code": null, "iso_3_code": "wgu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1050", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaurna", "iso_1_code": null, "iso_3_code": "zku", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1051", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1042", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "967", + "scripts": [], + "own_tokenizer": false }, { "name": "Wagaya-Warluwaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Warluwara-Thawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wagaya", "iso_1_code": null, "iso_3_code": "wga", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1054", + "scripts": [], + "own_tokenizer": false }, { "name": "Waluwarra", "iso_1_code": null, "iso_3_code": "wrb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1055", + "scripts": [], + "own_tokenizer": false }, { "name": "Yindjilandji", "iso_1_code": null, "iso_3_code": "yil", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1056", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1053", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1052", + "scripts": [], + "own_tokenizer": false }, { "name": "Waka-Kabic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gabi-Gabi", "iso_1_code": null, "iso_3_code": "gbw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1058", + "scripts": [], + "own_tokenizer": false }, { "name": "Batjala", "iso_1_code": null, "iso_3_code": "xby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1059", + "scripts": [], + "own_tokenizer": false }, { "name": "Kingkel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bayali", "iso_1_code": null, "iso_3_code": "bjy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1061", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1060", + "scripts": [], + "own_tokenizer": false }, { "name": "Miyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wakawaka", "iso_1_code": null, "iso_3_code": "wkw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1063", + "scripts": [], + "own_tokenizer": false }, { "name": "Wuliwuli", "iso_1_code": null, "iso_3_code": "wlu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1064", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1062", + "scripts": [], + "own_tokenizer": false }, { "name": "Than", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gureng Gureng", "iso_1_code": null, "iso_3_code": "gnr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1066", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1065", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1057", + "scripts": [], + "own_tokenizer": false }, { "name": "Warumungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Warumungu", "iso_1_code": null, "iso_3_code": "wrm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1068", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1067", + "scripts": [], + "own_tokenizer": false }, { "name": "Wiradhuric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gamilaraay", "iso_1_code": null, "iso_3_code": "kld", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1070", + "scripts": [], + "own_tokenizer": false }, { "name": "Wiradjuri", "iso_1_code": null, "iso_3_code": "wrh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1071", + "scripts": [], + "own_tokenizer": false }, { "name": "Wangaaybuwan-Ngiyambaa", "iso_1_code": null, "iso_3_code": "wyb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1072", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1069", + "scripts": [], + "own_tokenizer": false }, { "name": "Worimi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awabakal", "iso_1_code": null, "iso_3_code": "awk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1074", + "scripts": [], + "own_tokenizer": false }, { "name": "Worimi", "iso_1_code": null, "iso_3_code": "kda", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1075", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1073", + "scripts": [], + "own_tokenizer": false }, { "name": "Yalandyic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Djangun", "iso_1_code": null, "iso_3_code": "djf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1077", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuku-Yalanji", "iso_1_code": null, "iso_3_code": "gvn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1078", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Muluridyi", "iso_1_code": null, "iso_3_code": "vmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1079", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1076", + "scripts": [], + "own_tokenizer": false }, { "name": "Yanyuwan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yanyuwa", "iso_1_code": null, "iso_3_code": "jao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1081", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1080", + "scripts": [], + "own_tokenizer": false }, { "name": "Yarli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wadikali", "iso_1_code": null, "iso_3_code": "wdk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1083", + "scripts": [], + "own_tokenizer": false }, { "name": "Malyangapa", "iso_1_code": null, "iso_3_code": "yga", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1084", + "scripts": [], + "own_tokenizer": false }, { "name": "Yardliyawarra", "iso_1_code": null, "iso_3_code": "yxl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1085", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1082", + "scripts": [], + "own_tokenizer": false }, { "name": "Yidinic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Djabugay", "iso_1_code": null, "iso_3_code": "dyy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1087", + "scripts": [], + "own_tokenizer": false }, { "name": "Yidiny", "iso_1_code": null, "iso_3_code": "yii", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1088", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1086", + "scripts": [], + "own_tokenizer": false }, { "name": "Yotayotic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yorta Yorta", "iso_1_code": null, "iso_3_code": "xyy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1090", + "scripts": [], + "own_tokenizer": false }, { "name": "Yabula Yabula", "iso_1_code": null, "iso_3_code": "yxy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1091", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1089", + "scripts": [], + "own_tokenizer": false }, { "name": "Yugambal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yugambal", "iso_1_code": null, "iso_3_code": "yub", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1093", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1092", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dhurga", "iso_1_code": null, "iso_3_code": "dhu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1095", + "scripts": [], + "own_tokenizer": false }, { "name": "Dharawal", "iso_1_code": null, "iso_3_code": "tbh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1096", + "scripts": [], + "own_tokenizer": false }, { "name": "Birrpayi", "iso_1_code": null, "iso_3_code": "xbj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1097", + "scripts": [], + "own_tokenizer": false }, { "name": "Darkinyung", "iso_1_code": null, "iso_3_code": "xda", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1098", + "scripts": [], + "own_tokenizer": false }, { "name": "Dharuk", "iso_1_code": null, "iso_3_code": "xdk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1099", + "scripts": [], + "own_tokenizer": false }, { "name": "Jaitmatang", "iso_1_code": null, "iso_3_code": "xjt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1100", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarigu", "iso_1_code": null, "iso_3_code": "xni", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1101", + "scripts": [], + "own_tokenizer": false }, { "name": "Gundungurra", "iso_1_code": null, "iso_3_code": "xrd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1102", + "scripts": [], + "own_tokenizer": false }, { "name": "Thawa", "iso_1_code": null, "iso_3_code": "xtv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1103", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngunawal", "iso_1_code": null, "iso_3_code": "xul", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1104", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1094", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuulngu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Golpa", "iso_1_code": null, "iso_3_code": "lja", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1106", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhangu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dhangu-Djangu", "iso_1_code": null, "iso_3_code": "dhg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1108", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yan-nhangu", "iso_1_code": null, "iso_3_code": "jay", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1109", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1107", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhuwal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dayi", "iso_1_code": null, "iso_3_code": "dax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1111", + "scripts": [], + "own_tokenizer": false }, { "name": "Djambarrpuyngu", "iso_1_code": null, "iso_3_code": "djr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1112", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dhuwal", "iso_1_code": null, "iso_3_code": "dwu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1113", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhuwaya", "iso_1_code": null, "iso_3_code": "dwy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1114", + "scripts": [], + "own_tokenizer": false }, { "name": "Gumatj", "iso_1_code": null, "iso_3_code": "gnn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1115", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gupapuyngu", "iso_1_code": null, "iso_3_code": "guf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1116", + "scripts": [], + "own_tokenizer": false }, { "name": "Ritharrngu", "iso_1_code": null, "iso_3_code": "rit", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1117", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1110", + "scripts": [], + "own_tokenizer": false }, { "name": "Djinang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Djinba", "iso_1_code": null, "iso_3_code": "djb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1119", + "scripts": [], + "own_tokenizer": false }, { "name": "Djinang", "iso_1_code": null, "iso_3_code": "dji", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1120", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1118", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1105", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "757", + "scripts": [], + "own_tokenizer": false }, { "name": "Tangic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ganggalida", "iso_1_code": null, "iso_3_code": "gcd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1122", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayardild", "iso_1_code": null, "iso_3_code": "gyd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1123", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyangga", "iso_1_code": null, "iso_3_code": "nny", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1124", + "scripts": [], + "own_tokenizer": false }, { "name": "Minkin", "iso_1_code": null, "iso_3_code": "xxm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1125", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1121", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiwian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tiwi", "iso_1_code": null, "iso_3_code": "tiw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1127", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1126", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbugarla-Ngumbur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ngurmbur", "iso_1_code": null, "iso_3_code": "nrx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1129", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbugarla", "iso_1_code": null, "iso_3_code": "umr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1130", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1128", + "scripts": [], + "own_tokenizer": false }, { "name": "West Barkly", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jingulu", "iso_1_code": null, "iso_3_code": "jig", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1132", + "scripts": [], + "own_tokenizer": false }, { "name": "Gudanji", "iso_1_code": null, "iso_3_code": "nji", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1133", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambaya", "iso_1_code": null, "iso_3_code": "wmb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1134", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1131", + "scripts": [], + "own_tokenizer": false }, { "name": "Worrorran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gambera", "iso_1_code": null, "iso_3_code": "gma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1136", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwini", "iso_1_code": null, "iso_3_code": "gww", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1137", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarinyin", "iso_1_code": null, "iso_3_code": "ung", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1138", + "scripts": [], + "own_tokenizer": false }, { "name": "Miwa", "iso_1_code": null, "iso_3_code": "vmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1139", + "scripts": [], + "own_tokenizer": false }, { "name": "Wilawila", "iso_1_code": null, "iso_3_code": "wil", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1140", + "scripts": [], + "own_tokenizer": false }, { "name": "Wunambal", "iso_1_code": null, "iso_3_code": "wub", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1141", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngarinyinic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andajin", "iso_1_code": null, "iso_3_code": "ajn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1143", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1142", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Worrorran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yawijibaya", "iso_1_code": null, "iso_3_code": "jbw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1145", + "scripts": [], + "own_tokenizer": false }, { "name": "Worrorra", "iso_1_code": null, "iso_3_code": "wro", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1146", + "scripts": [], + "own_tokenizer": false }, { "name": "Unggumi", "iso_1_code": null, "iso_3_code": "xgu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1147", + "scripts": [], + "own_tokenizer": false }, { "name": "Umiida", "iso_1_code": null, "iso_3_code": "xud", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1148", + "scripts": [], + "own_tokenizer": false }, { "name": "Unggaranggu", "iso_1_code": null, "iso_3_code": "xun", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1149", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1144", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1135", + "scripts": [], + "own_tokenizer": false }, { "name": "Yanyi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Waanyi", "iso_1_code": null, "iso_3_code": "wny", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1151", + "scripts": [], + "own_tokenizer": false }, { "name": "Garrwa", "iso_1_code": null, "iso_3_code": "wrk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1152", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1150", + "scripts": [], + "own_tokenizer": false }, { "name": "Yiwaidjan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amaragic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amurdak", "iso_1_code": null, "iso_3_code": "amg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1155", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1154", + "scripts": [], + "own_tokenizer": false }, { "name": "Margic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Margu", "iso_1_code": null, "iso_3_code": "mhg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1157", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1156", + "scripts": [], + "own_tokenizer": false }, { "name": "Yiwaidjic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Iwaidja", "iso_1_code": null, "iso_3_code": "ibd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1159", + "scripts": [], + "own_tokenizer": false }, { "name": "Garig-Ilgar", "iso_1_code": null, "iso_3_code": "ilg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1160", + "scripts": [], + "own_tokenizer": false }, { "name": "Maung", "iso_1_code": null, "iso_3_code": "mph", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1161", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manangkari", "iso_1_code": null, "iso_3_code": "znk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1162", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1158", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1153", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "656", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Austro-Asiatic.json b/data/Austro-Asiatic.json index cfc0a01809947594a0f1df73cfd07cdd5702b288..89525eb3f2583a450beda3bd14b663d7cbcdff23 100644 --- a/data/Austro-Asiatic.json +++ b/data/Austro-Asiatic.json @@ -2,3108 +2,4133 @@ "name": "Austro-Asiatic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": "jah", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1167", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1166", + "scripts": [], + "own_tokenizer": false }, { "name": "North Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chewong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cheq Wong", "iso_1_code": null, "iso_3_code": "cwg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1170", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1169", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Batek", "iso_1_code": null, "iso_3_code": "btq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1172", + "scripts": [], + "own_tokenizer": false }, { "name": "Jehai", "iso_1_code": null, "iso_3_code": "jhi", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1173", + "scripts": [], + "own_tokenizer": false }, { "name": "Minriq", "iso_1_code": null, "iso_3_code": "mnq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1174", + "scripts": [], + "own_tokenizer": false }, { "name": "Mintil", "iso_1_code": null, "iso_3_code": "mzt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1175", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1171", + "scripts": [], + "own_tokenizer": false }, { "name": "Tonga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ten\u2019edn", "iso_1_code": null, "iso_3_code": "tnz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1177", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1176", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kintaq", "iso_1_code": null, "iso_3_code": "knq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1179", + "scripts": [], + "own_tokenizer": false }, { "name": "Kensiu", "iso_1_code": null, "iso_3_code": "kns", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1180", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1178", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1168", + "scripts": [], + "own_tokenizer": false }, { "name": "Senoic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lanoh", "iso_1_code": null, "iso_3_code": "lnh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1182", + "scripts": [], + "own_tokenizer": false }, { "name": "Sab\u00fcm", "iso_1_code": null, "iso_3_code": "sbo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1183", + "scripts": [], + "own_tokenizer": false }, { "name": "Semai", "iso_1_code": null, "iso_3_code": "sea", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1184", + "scripts": [], + "own_tokenizer": false }, { "name": "Semnam", "iso_1_code": null, "iso_3_code": "ssm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1185", + "scripts": [], + "own_tokenizer": false }, { "name": "Temiar", "iso_1_code": null, "iso_3_code": "tea", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1186", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1181", + "scripts": [], + "own_tokenizer": false }, { "name": "South Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mah Meri", "iso_1_code": null, "iso_3_code": "mhe", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1188", + "scripts": [], + "own_tokenizer": false }, { "name": "Semelai", "iso_1_code": null, "iso_3_code": "sza", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1189", + "scripts": [], + "own_tokenizer": false }, { "name": "Semaq Beri", "iso_1_code": null, "iso_3_code": "szc", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1190", + "scripts": [], + "own_tokenizer": false }, { "name": "Temoq", "iso_1_code": null, "iso_3_code": "tmo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1191", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1187", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1165", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Alak", "iso_1_code": null, "iso_3_code": "alk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1195", + "scripts": [], + "own_tokenizer": false }, { "name": "Bahnar", "iso_1_code": null, "iso_3_code": "bdq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1196", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romam", "iso_1_code": null, "iso_3_code": "rmx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1197", + "scripts": [], + "own_tokenizer": false }, { "name": "Tampuan", "iso_1_code": null, "iso_3_code": "tpu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1198", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1194", + "scripts": [], + "own_tokenizer": false }, { "name": "East Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cua", "iso_1_code": null, "iso_3_code": "cua", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1200", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1199", + "scripts": [], + "own_tokenizer": false }, { "name": "North Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Katua", "iso_1_code": null, "iso_3_code": "kta", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1202", + "scripts": [], + "own_tokenizer": false }, { "name": "Kachok", "iso_1_code": null, "iso_3_code": "xkk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1203", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kayong", "iso_1_code": null, "iso_3_code": "kxy", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1205", + "scripts": [], + "own_tokenizer": false }, { "name": "Takua", "iso_1_code": null, "iso_3_code": "tkz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1206", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1204", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Trieng", "iso_1_code": null, "iso_3_code": "stg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1208", + "scripts": [], + "own_tokenizer": false }, { "name": "Talieng", "iso_1_code": null, "iso_3_code": "tdf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1209", + "scripts": [], + "own_tokenizer": false }, { "name": "Duan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Halang Doan", "iso_1_code": null, "iso_3_code": "hld", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1211", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1210", + "scripts": [], + "own_tokenizer": false }, { "name": "Jeh-Halang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Halang", "iso_1_code": null, "iso_3_code": "hal", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1213", + "scripts": [], + "own_tokenizer": false }, { "name": "Jeh", "iso_1_code": null, "iso_3_code": "jeh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1214", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1212", + "scripts": [], + "own_tokenizer": false }, { "name": "Rengao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Rengao", "iso_1_code": null, "iso_3_code": "ren", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1216", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1215", + "scripts": [], + "own_tokenizer": false }, { "name": "Sedang-Todrah", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sedang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hre", "iso_1_code": null, "iso_3_code": "hre", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1219", + "scripts": [], + "own_tokenizer": false }, { "name": "Sedang", "iso_1_code": null, "iso_3_code": "sed", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1220", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1218", + "scripts": [], + "own_tokenizer": false }, { "name": "Todrah-Monom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Monom", "iso_1_code": null, "iso_3_code": "moo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1222", + "scripts": [], + "own_tokenizer": false }, { "name": "Todrah", "iso_1_code": null, "iso_3_code": "tdr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1223", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1221", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1217", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1207", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1201", + "scripts": [], + "own_tokenizer": false }, { "name": "South Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Stieng, Budeh", "iso_1_code": null, "iso_3_code": "stt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1225", + "scripts": [], + "own_tokenizer": false }, { "name": "Sre-Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mnong, Eastern", "iso_1_code": null, "iso_3_code": "mng", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1229", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1228", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern-Central Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mnong, Central", "iso_1_code": null, "iso_3_code": "cmo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1231", + "scripts": [ + "Latn", + "Khmr" + ], + "own_tokenizer": false }, { "name": "Mnong, Southern", "iso_1_code": null, "iso_3_code": "mnn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1232", + "scripts": [], + "own_tokenizer": false }, { "name": "Kraol", "iso_1_code": null, "iso_3_code": "rka", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1233", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1230", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1227", + "scripts": [], + "own_tokenizer": false }, { "name": "Sre", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Maa", "iso_1_code": null, "iso_3_code": "cma", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1235", + "scripts": [], + "own_tokenizer": false }, { "name": "Koho", "iso_1_code": null, "iso_3_code": "kpm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1236", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1234", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1226", + "scripts": [], + "own_tokenizer": false }, { "name": "Stieng-Chrau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chrau", "iso_1_code": null, "iso_3_code": "crw", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1238", + "scripts": [], + "own_tokenizer": false }, { "name": "Mel-Khaonh", "iso_1_code": null, "iso_3_code": "hkn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1239", + "scripts": [], + "own_tokenizer": false }, { "name": "Stieng, Bulo", "iso_1_code": null, "iso_3_code": "sti", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1240", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1237", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1224", + "scripts": [], + "own_tokenizer": false }, { "name": "West Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lavi", "iso_1_code": null, "iso_3_code": "lvi", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1242", + "scripts": [], + "own_tokenizer": false }, { "name": "Brao-Kravet", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Brao", "iso_1_code": null, "iso_3_code": "brb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1244", + "scripts": [], + "own_tokenizer": false }, { "name": "Krung", "iso_1_code": null, "iso_3_code": "krr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1245", + "scripts": [], + "own_tokenizer": false }, { "name": "Kavet", "iso_1_code": null, "iso_3_code": "krv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1246", + "scripts": [], + "own_tokenizer": false }, { "name": "Sou", "iso_1_code": null, "iso_3_code": "sqq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1247", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1243", + "scripts": [], + "own_tokenizer": false }, { "name": "Laven", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Laven", "iso_1_code": null, "iso_3_code": "lbo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1249", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1248", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": "nev", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1251", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1250", + "scripts": [], + "own_tokenizer": false }, { "name": "Oi-The", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oy", "iso_1_code": null, "iso_3_code": "oyb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1253", + "scripts": [], + "own_tokenizer": false }, { "name": "Sapuan", "iso_1_code": null, "iso_3_code": "spu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1254", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1252", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1241", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1193", + "scripts": [], + "own_tokenizer": false }, { "name": "Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ta\u2019oih", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ir", "iso_1_code": null, "iso_3_code": "irr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1258", + "scripts": [], + "own_tokenizer": false }, { "name": "Ong", "iso_1_code": null, "iso_3_code": "oog", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1259", + "scripts": [], + "own_tokenizer": false }, { "name": "Ta\u2019oih, Upper", "iso_1_code": null, "iso_3_code": "tth", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1260", + "scripts": [], + "own_tokenizer": false }, { "name": "Ta\u2019oih, Lower", "iso_1_code": null, "iso_3_code": "tto", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1261", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1257", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1256", + "scripts": [], + "own_tokenizer": false }, { "name": "East Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Katu-Pacoh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Katu, Eastern", "iso_1_code": null, "iso_3_code": "ktv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1264", + "scripts": [], + "own_tokenizer": false }, { "name": "Katu, Western", "iso_1_code": null, "iso_3_code": "kuf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1265", + "scripts": [], + "own_tokenizer": false }, { "name": "Pacoh", "iso_1_code": null, "iso_3_code": "pac", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1266", + "scripts": [], + "own_tokenizer": false }, { "name": "Phuong", "iso_1_code": null, "iso_3_code": "phg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1267", + "scripts": [], + "own_tokenizer": false }, { "name": "Tareng", "iso_1_code": null, "iso_3_code": "tgr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1268", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1263", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngeq-Nkriang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kriang", "iso_1_code": null, "iso_3_code": "ngt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1270", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1269", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1262", + "scripts": [], + "own_tokenizer": false }, { "name": "West Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bru, Eastern", "iso_1_code": null, "iso_3_code": "bru", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1273", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bru, Western", "iso_1_code": null, "iso_3_code": "brv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1274", + "scripts": [], + "own_tokenizer": false }, { "name": "Katang, Northern", "iso_1_code": null, "iso_3_code": "ncq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1275", + "scripts": [ + "Laoo" + ], + "own_tokenizer": false }, { "name": "Katang, Southern", "iso_1_code": null, "iso_3_code": "sct", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1276", + "scripts": [], + "own_tokenizer": false }, { "name": "So", "iso_1_code": null, "iso_3_code": "sss", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1277", + "scripts": [], + "own_tokenizer": false }, { "name": "Khua", "iso_1_code": null, "iso_3_code": "xhv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1278", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1272", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kuay", "iso_1_code": null, "iso_3_code": "kdt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1280", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyeu", "iso_1_code": null, "iso_3_code": "nyl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1281", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1279", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1271", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1255", + "scripts": [], + "own_tokenizer": false }, { "name": "Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khmer", "iso_1_code": "km", "iso_3_code": "khm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1283", + "scripts": [ + "Khmr" + ], + "own_tokenizer": false }, { "name": "Khmer, Northern", "iso_1_code": null, "iso_3_code": "kxm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1284", + "scripts": [ + "Thai" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1282", + "scripts": [], + "own_tokenizer": false }, { "name": "Pearic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pear", "iso_1_code": null, "iso_3_code": "pcb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1287", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1286", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": "cog", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1290", + "scripts": [], + "own_tokenizer": false }, { "name": "Chung", "iso_1_code": null, "iso_3_code": "scq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1291", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1289", + "scripts": [], + "own_tokenizer": false }, { "name": "Samre", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Somray", "iso_1_code": null, "iso_3_code": "smu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1293", + "scripts": [], + "own_tokenizer": false }, { "name": "Samre", "iso_1_code": null, "iso_3_code": "sxm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1294", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1292", + "scripts": [], + "own_tokenizer": false }, { "name": "Suoy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Su\u2019ung", "iso_1_code": null, "iso_3_code": "syo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1296", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1288", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1285", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1192", + "scripts": [], + "own_tokenizer": false }, { "name": "Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mon", "iso_1_code": null, "iso_3_code": "mnw", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1298", + "scripts": [ + "Mymr" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1297", + "scripts": [], + "own_tokenizer": false }, { "name": "Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Car", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nicobarese, Car", "iso_1_code": null, "iso_3_code": "caq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1301", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1300", + "scripts": [], + "own_tokenizer": false }, { "name": "Chowra-Teressa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chaura", "iso_1_code": null, "iso_3_code": "crv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1303", + "scripts": [], + "own_tokenizer": false }, { "name": "Teressa", "iso_1_code": null, "iso_3_code": "tef", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1304", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1302", + "scripts": [], + "own_tokenizer": false }, { "name": "Great Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nicobarese, Southern", "iso_1_code": null, "iso_3_code": "nik", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1306", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1305", + "scripts": [], + "own_tokenizer": false }, { "name": "Nancowry", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nicobarese, Central", "iso_1_code": null, "iso_3_code": "ncb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1308", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1307", + "scripts": [], + "own_tokenizer": false }, { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": "sii", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1310", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1309", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1299", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khasian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "War-Jaintia", "iso_1_code": null, "iso_3_code": "aml", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1313", + "scripts": [], + "own_tokenizer": false }, { "name": "Khasi", "iso_1_code": null, "iso_3_code": "kha", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1314", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lyngngam", "iso_1_code": null, "iso_3_code": "lyg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1315", + "scripts": [], + "own_tokenizer": false }, { "name": "Pnar", "iso_1_code": null, "iso_3_code": "pbv", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1316", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1312", + "scripts": [], + "own_tokenizer": false }, { "name": "Khmuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": "xao", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1319", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1318", + "scripts": [], + "own_tokenizer": false }, { "name": "Mal-Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khuen", "iso_1_code": null, "iso_3_code": "khf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1322", + "scripts": [], + "own_tokenizer": false }, { "name": "Khmu", "iso_1_code": null, "iso_3_code": "kjg", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1323", + "scripts": [], + "own_tokenizer": false }, { "name": "O\u2019du", "iso_1_code": null, "iso_3_code": "tyh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1324", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1321", + "scripts": [], + "own_tokenizer": false }, { "name": "Mal-Prai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mal", "iso_1_code": null, "iso_3_code": "mlf", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1326", + "scripts": [], + "own_tokenizer": false }, { "name": "Prai", "iso_1_code": null, "iso_3_code": "prt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1327", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1325", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1320", + "scripts": [], + "own_tokenizer": false }, { "name": "Mlabri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mlabri", "iso_1_code": null, "iso_3_code": "mra", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1329", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1328", + "scripts": [], + "own_tokenizer": false }, { "name": "Xinh Mul", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Phong-Kniang", "iso_1_code": null, "iso_3_code": "pnx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1331", + "scripts": [], + "own_tokenizer": false }, { "name": "Puoc", "iso_1_code": null, "iso_3_code": "puo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1332", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1330", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1317", + "scripts": [], + "own_tokenizer": false }, { "name": "Mang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mang", "iso_1_code": null, "iso_3_code": "zng", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1334", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1333", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Angkuic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hu", "iso_1_code": null, "iso_3_code": "huo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1338", + "scripts": [], + "own_tokenizer": false }, { "name": "Kon Keu", "iso_1_code": null, "iso_3_code": "kkn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1339", + "scripts": [], + "own_tokenizer": false }, { "name": "Man Met", "iso_1_code": null, "iso_3_code": "mml", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1340", + "scripts": [], + "own_tokenizer": false }, { "name": "Mok", "iso_1_code": null, "iso_3_code": "mqt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1341", + "scripts": [], + "own_tokenizer": false }, { "name": "Samtao", "iso_1_code": null, "iso_3_code": "stu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1342", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Loi", "iso_1_code": null, "iso_3_code": "tlq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1343", + "scripts": [], + "own_tokenizer": false }, { "name": "Muak Sa-aak", "iso_1_code": null, "iso_3_code": "ukk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1344", + "scripts": [], + "own_tokenizer": false }, { "name": "U", "iso_1_code": null, "iso_3_code": "uuu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1345", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiorr", "iso_1_code": null, "iso_3_code": "xko", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1346", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1337", + "scripts": [], + "own_tokenizer": false }, { "name": "Bit-Khang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bit", "iso_1_code": null, "iso_3_code": "bgk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1348", + "scripts": [], + "own_tokenizer": false }, { "name": "Bumang", "iso_1_code": null, "iso_3_code": "bvp", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1349", + "scripts": [], + "own_tokenizer": false }, { "name": "Kh\u00e1ng", "iso_1_code": null, "iso_3_code": "kjm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1350", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1347", + "scripts": [], + "own_tokenizer": false }, { "name": "Lametic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Con", "iso_1_code": null, "iso_3_code": "cno", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1352", + "scripts": [], + "own_tokenizer": false }, { "name": "Rmeet", "iso_1_code": null, "iso_3_code": "lbn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1353", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1351", + "scripts": [], + "own_tokenizer": false }, { "name": "Waic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bulang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Blang", "iso_1_code": null, "iso_3_code": "blr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1356", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1355", + "scripts": [], + "own_tokenizer": false }, { "name": "Lawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lawa, Western", "iso_1_code": null, "iso_3_code": "lcp", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1358", + "scripts": [ + "Thai" + ], + "own_tokenizer": false }, { "name": "Lawa, Eastern", "iso_1_code": null, "iso_3_code": "lwl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1359", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1357", + "scripts": [], + "own_tokenizer": false }, { "name": "Wa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wa, Parauk", "iso_1_code": null, "iso_3_code": "prk", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1361", + "scripts": [], + "own_tokenizer": false }, { "name": "Awa", "iso_1_code": null, "iso_3_code": "vwa", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1362", + "scripts": [], + "own_tokenizer": false }, { "name": "Wa, Vo", "iso_1_code": null, "iso_3_code": "wbm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1363", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1360", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1354", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1336", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": "dnu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1366", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1365", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Palaung, Ruching", "iso_1_code": null, "iso_3_code": "pce", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1368", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaung, Shwe", "iso_1_code": null, "iso_3_code": "pll", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1369", + "scripts": [], + "own_tokenizer": false }, { "name": "Palaung, Rumai", "iso_1_code": null, "iso_3_code": "rbb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1370", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1367", + "scripts": [], + "own_tokenizer": false }, { "name": "Riang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Riang Lang", "iso_1_code": null, "iso_3_code": "ril", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1372", + "scripts": [], + "own_tokenizer": false }, { "name": "Riang Lai", "iso_1_code": null, "iso_3_code": "yin", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1373", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1371", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1364", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1335", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1311", + "scripts": [], + "own_tokenizer": false }, { "name": "Palyu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bugan", "iso_1_code": null, "iso_3_code": "bbh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1375", + "scripts": [], + "own_tokenizer": false }, { "name": "Bolyu", "iso_1_code": null, "iso_3_code": "ply", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1376", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1374", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyahkur", "iso_1_code": null, "iso_3_code": "cbn", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1378", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1377", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kemiehua", "iso_1_code": null, "iso_3_code": "kfj", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1380", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuanhua", "iso_1_code": null, "iso_3_code": "xnh", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1381", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1379", + "scripts": [], + "own_tokenizer": false }, { "name": "Viet-Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Chut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Arem", "iso_1_code": null, "iso_3_code": "aem", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1384", + "scripts": [], + "own_tokenizer": false }, { "name": "Maleng", "iso_1_code": null, "iso_3_code": "pkt", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1385", + "scripts": [], + "own_tokenizer": false }, { "name": "Chut", "iso_1_code": null, "iso_3_code": "scb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1386", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1383", + "scripts": [], + "own_tokenizer": false }, { "name": "Cuoi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hung", "iso_1_code": null, "iso_3_code": "hnu", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1388", + "scripts": [], + "own_tokenizer": false }, { "name": "Tho", "iso_1_code": null, "iso_3_code": "tou", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1389", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1387", + "scripts": [], + "own_tokenizer": false }, { "name": "Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bo", "iso_1_code": null, "iso_3_code": "bgl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1391", + "scripts": [], + "own_tokenizer": false }, { "name": "Muong", "iso_1_code": null, "iso_3_code": "mtq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1392", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngu\u00f4n", "iso_1_code": null, "iso_3_code": "nuo", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1393", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1390", + "scripts": [], + "own_tokenizer": false }, { "name": "Thavung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aheu", "iso_1_code": null, "iso_3_code": "thm", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1395", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1394", + "scripts": [], + "own_tokenizer": false }, { "name": "Vietnamese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Vietnamese", "iso_1_code": "vi", "iso_3_code": "vie", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "1397", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "1396", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1382", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1164", + "scripts": [], + "own_tokenizer": false }, { "name": "Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "North Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kherwari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Agariya", "iso_1_code": null, "iso_3_code": "agi", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1401", + "scripts": [], + "own_tokenizer": false }, { "name": "Bijori", "iso_1_code": null, "iso_3_code": "bix", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1402", + "scripts": [], + "own_tokenizer": false }, { "name": "Kodaku", "iso_1_code": null, "iso_3_code": "ksz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1403", + "scripts": [], + "own_tokenizer": false }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Asuri", "iso_1_code": null, "iso_3_code": "asr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1405", + "scripts": [], + "own_tokenizer": false }, { "name": "Birhor", "iso_1_code": null, "iso_3_code": "biy", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1406", + "scripts": [], + "own_tokenizer": false }, { "name": "Koda", "iso_1_code": null, "iso_3_code": "cdz", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1407", + "scripts": [], + "own_tokenizer": false }, { "name": "Kol", "iso_1_code": null, "iso_3_code": "ekl", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1408", + "scripts": [], + "own_tokenizer": false }, { "name": "Ho", "iso_1_code": null, "iso_3_code": "hoc", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1409", + "scripts": [ + "Latn", + "Wara" + ], + "own_tokenizer": false }, { "name": "Korwa", "iso_1_code": null, "iso_3_code": "kfp", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1410", + "scripts": [], + "own_tokenizer": false }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": "unr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1411", + "scripts": [], + "own_tokenizer": false }, { "name": "Munda", "iso_1_code": null, "iso_3_code": "unx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1412", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1404", + "scripts": [], + "own_tokenizer": false }, { "name": "Santali", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mahali", "iso_1_code": null, "iso_3_code": "mjx", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1414", + "scripts": [], + "own_tokenizer": false }, { "name": "Santhali", "iso_1_code": null, "iso_3_code": "sat", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1415", + "scripts": [ + "Latn", + "Olck" + ], + "own_tokenizer": false }, { "name": "Turi", "iso_1_code": null, "iso_3_code": "trd", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1416", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1413", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1400", + "scripts": [], + "own_tokenizer": false }, { "name": "Korku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Korku", "iso_1_code": null, "iso_3_code": "kfq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1418", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1417", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1399", + "scripts": [], + "own_tokenizer": false }, { "name": "South Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kharia-Juang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Juang", "iso_1_code": null, "iso_3_code": "jun", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1421", + "scripts": [ + "Orya" + ], + "own_tokenizer": false }, { "name": "Kharia", "iso_1_code": null, "iso_3_code": "khr", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1422", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1420", + "scripts": [], + "own_tokenizer": false }, { "name": "Koraput Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gutob-Remo-Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gata\u2019", "iso_1_code": null, "iso_3_code": "gaq", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1426", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1425", + "scripts": [], + "own_tokenizer": false }, { "name": "Gutob-Remo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bondo", "iso_1_code": null, "iso_3_code": "bfw", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1428", + "scripts": [], + "own_tokenizer": false }, { "name": "Gadaba, Bodo", "iso_1_code": null, "iso_3_code": "gbj", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1429", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1427", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1424", + "scripts": [], + "own_tokenizer": false }, { "name": "Sora-Juray-Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Parenga", "iso_1_code": null, "iso_3_code": "pcj", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1432", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1431", + "scripts": [], + "own_tokenizer": false }, { "name": "Sora-Juray", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Juray", "iso_1_code": null, "iso_3_code": "juy", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1434", + "scripts": [], + "own_tokenizer": false }, { "name": "Sora", "iso_1_code": null, "iso_3_code": "srb", - "tokenizer": { - "name": "vietnamese", - "tokenizer": "SpaCyTokenizer(\"vi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1435", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1433", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1430", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1423", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1419", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1163", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Austronesian.json b/data/Austronesian.json index 795159d71a29e3bd3a13daffa374b6da79e1b456..477a92a6a242890d3c7300f5ee5695c424fa8e96 100644 --- a/data/Austronesian.json +++ b/data/Austronesian.json @@ -2,20569 +2,45036 @@ "name": "Austronesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Atayalic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Atayal", "iso_1_code": null, "iso_3_code": "tay", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1438", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sediq", "iso_1_code": null, "iso_3_code": "trv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1439", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1437", + "scripts": [], + "own_tokenizer": false }, { "name": "Bunun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bunun", "iso_1_code": null, "iso_3_code": "bnn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1441", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1440", + "scripts": [], + "own_tokenizer": false }, { "name": "East Formosan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Amis", "iso_1_code": null, "iso_3_code": "ami", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1444", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sakizaya", "iso_1_code": null, "iso_3_code": "szy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1445", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1443", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Basay", "iso_1_code": null, "iso_3_code": "byq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1447", + "scripts": [], + "own_tokenizer": false }, { "name": "Kavalan", "iso_1_code": null, "iso_3_code": "ckv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1448", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1446", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Siraya", "iso_1_code": null, "iso_3_code": "fos", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1450", + "scripts": [], + "own_tokenizer": false }, { "name": "Taivoan", "iso_1_code": null, "iso_3_code": "tvx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1451", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1449", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1442", + "scripts": [], + "own_tokenizer": false }, { "name": "Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Agta, Villa Viciosa", "iso_1_code": null, "iso_3_code": "dyg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1453", + "scripts": [], + "own_tokenizer": false }, { "name": "Bali-Sasak-Sumbawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bali", "iso_1_code": null, "iso_3_code": "ban", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1455", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sasak-Sumbawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sasak", "iso_1_code": null, "iso_3_code": "sas", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1457", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sumbawa", "iso_1_code": null, "iso_3_code": "smw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1458", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1456", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1454", + "scripts": [], + "own_tokenizer": false }, { "name": "Bashiic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ivatan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ibatan", "iso_1_code": null, "iso_3_code": "ivb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1461", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ivatan", "iso_1_code": null, "iso_3_code": "ivv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1462", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1460", + "scripts": [], + "own_tokenizer": false }, { "name": "Yami", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yami", "iso_1_code": null, "iso_3_code": "tao", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1464", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1463", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1459", + "scripts": [], + "own_tokenizer": false }, { "name": "Bilic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bagobo-Klata", "iso_1_code": null, "iso_3_code": "bgi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1466", + "scripts": [], + "own_tokenizer": false }, { "name": "Teduray", "iso_1_code": null, "iso_3_code": "tiy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1467", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Blaan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Blaan, Koronadal", "iso_1_code": null, "iso_3_code": "bpr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1469", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Blaan, Sarangani", "iso_1_code": null, "iso_3_code": "bps", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1470", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1468", + "scripts": [], + "own_tokenizer": false }, { "name": "Tboli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tboli", "iso_1_code": null, "iso_3_code": "tbl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1472", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1471", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1465", + "scripts": [], + "own_tokenizer": false }, { "name": "Celebic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Saluan-Banggai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Banggai", "iso_1_code": null, "iso_3_code": "bgz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1477", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Balantak", "iso_1_code": null, "iso_3_code": "blz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1478", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1476", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Andio", "iso_1_code": null, "iso_3_code": "bzb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1480", + "scripts": [], + "own_tokenizer": false }, { "name": "Saluanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bobongko", "iso_1_code": null, "iso_3_code": "bgb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1482", + "scripts": [], + "own_tokenizer": false }, { "name": "Saluan", "iso_1_code": null, "iso_3_code": "loe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1483", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Batui", "iso_1_code": null, "iso_3_code": "zbt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1484", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1481", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1479", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1475", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bungku-Tolaki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bungku", "iso_1_code": null, "iso_3_code": "bkz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1489", + "scripts": [], + "own_tokenizer": false }, { "name": "Bahonsuai", "iso_1_code": null, "iso_3_code": "bsu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1490", + "scripts": [], + "own_tokenizer": false }, { "name": "Wawonii", "iso_1_code": null, "iso_3_code": "wow", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1491", + "scripts": [], + "own_tokenizer": false }, { "name": "Mori Bawah", "iso_1_code": null, "iso_3_code": "xmz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1492", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulisusu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Taloki", "iso_1_code": null, "iso_3_code": "tlk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1494", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulisusu", "iso_1_code": null, "iso_3_code": "vkl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1495", + "scripts": [], + "own_tokenizer": false }, { "name": "Koroni", "iso_1_code": null, "iso_3_code": "xkq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1496", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1493", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1488", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Moronene", "iso_1_code": null, "iso_3_code": "mqn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1498", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1497", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1487", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mori Atas", "iso_1_code": null, "iso_3_code": "mzq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1501", + "scripts": [], + "own_tokenizer": false }, { "name": "Padoe", "iso_1_code": null, "iso_3_code": "pdo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1502", + "scripts": [], + "own_tokenizer": false }, { "name": "Tomadino", "iso_1_code": null, "iso_3_code": "tdi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1503", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1500", + "scripts": [], + "own_tokenizer": false }, { "name": "West Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tolaki", "iso_1_code": null, "iso_3_code": "lbw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1505", + "scripts": [], + "own_tokenizer": false }, { "name": "Rahambuu", "iso_1_code": null, "iso_3_code": "raz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1506", + "scripts": [], + "own_tokenizer": false }, { "name": "Kodeoha", "iso_1_code": null, "iso_3_code": "vko", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1507", + "scripts": [], + "own_tokenizer": false }, { "name": "Waru", "iso_1_code": null, "iso_3_code": "wru", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1508", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1504", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1499", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1486", + "scripts": [], + "own_tokenizer": false }, { "name": "Muna-Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nuclear Muna-Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lasalimu", "iso_1_code": null, "iso_3_code": "llm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1513", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumbewaha", "iso_1_code": null, "iso_3_code": "xks", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1514", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1512", + "scripts": [], + "own_tokenizer": false }, { "name": "West Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Cia-Cia", "iso_1_code": null, "iso_3_code": "cia", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1516", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1515", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1511", + "scripts": [], + "own_tokenizer": false }, { "name": "Munan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Busoa", "iso_1_code": null, "iso_3_code": "bup", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1518", + "scripts": [], + "own_tokenizer": false }, { "name": "Munic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kaimbulawa", "iso_1_code": null, "iso_3_code": "zka", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1520", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Liabuku", "iso_1_code": null, "iso_3_code": "lix", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1522", + "scripts": [], + "own_tokenizer": false }, { "name": "Muna", "iso_1_code": null, "iso_3_code": "mnb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1523", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pancana", "iso_1_code": null, "iso_3_code": "pnp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1524", + "scripts": [], + "own_tokenizer": false }, { "name": "Kioko", "iso_1_code": null, "iso_3_code": "ues", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1525", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1519", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1517", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1510", + "scripts": [], + "own_tokenizer": false }, { "name": "Tukangbesi-Bonerate", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tukang Besi South", "iso_1_code": null, "iso_3_code": "bhq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1527", + "scripts": [], + "own_tokenizer": false }, { "name": "Bonerate", "iso_1_code": null, "iso_3_code": "bna", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1528", + "scripts": [], + "own_tokenizer": false }, { "name": "Tukang Besi North", "iso_1_code": null, "iso_3_code": "khc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1529", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1526", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1509", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1485", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1474", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaili-Pamona", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kaili", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Baras", "iso_1_code": null, "iso_3_code": "brs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1533", + "scripts": [], + "own_tokenizer": false }, { "name": "Tado", "iso_1_code": null, "iso_3_code": "klw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1534", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaili, Da\u2019a", "iso_1_code": null, "iso_3_code": "kzf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1535", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kaili, Ledo", "iso_1_code": null, "iso_3_code": "lew", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1536", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Moma", "iso_1_code": null, "iso_3_code": "myl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1537", + "scripts": [], + "own_tokenizer": false }, { "name": "Topoiyo", "iso_1_code": null, "iso_3_code": "toy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1538", + "scripts": [], + "own_tokenizer": false }, { "name": "Sedoa", "iso_1_code": null, "iso_3_code": "tvw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1539", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaili, Unde", "iso_1_code": null, "iso_3_code": "unz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1540", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1532", + "scripts": [], + "own_tokenizer": false }, { "name": "Pamona", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pamona", "iso_1_code": null, "iso_3_code": "pmf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1542", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tombelala", "iso_1_code": null, "iso_3_code": "ttp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1543", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1541", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1531", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rampi", "iso_1_code": null, "iso_3_code": "lje", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1545", + "scripts": [], + "own_tokenizer": false }, { "name": "Uma", "iso_1_code": null, "iso_3_code": "ppk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1546", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sarudu", "iso_1_code": null, "iso_3_code": "sdu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1547", + "scripts": [], + "own_tokenizer": false }, { "name": "Badaic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Behoa", "iso_1_code": null, "iso_3_code": "bep", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1549", + "scripts": [], + "own_tokenizer": false }, { "name": "Bada", "iso_1_code": null, "iso_3_code": "bhz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1550", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Napu", "iso_1_code": null, "iso_3_code": "npy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1551", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1548", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1544", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1530", + "scripts": [], + "own_tokenizer": false }, { "name": "Tomini-Tolitoli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tolitoli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Boano", "iso_1_code": null, "iso_3_code": "bzl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1554", + "scripts": [], + "own_tokenizer": false }, { "name": "Totoli", "iso_1_code": null, "iso_3_code": "txe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1555", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1553", + "scripts": [], + "own_tokenizer": false }, { "name": "Tomini", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dondo", "iso_1_code": null, "iso_3_code": "dok", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1558", + "scripts": [], + "own_tokenizer": false }, { "name": "Lauje", "iso_1_code": null, "iso_3_code": "law", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1559", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tomini", "iso_1_code": null, "iso_3_code": "txm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1560", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1557", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Balaesang", "iso_1_code": null, "iso_3_code": "bls", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1562", + "scripts": [], + "own_tokenizer": false }, { "name": "Dampelas", "iso_1_code": null, "iso_3_code": "dms", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1563", + "scripts": [], + "own_tokenizer": false }, { "name": "Taje", "iso_1_code": null, "iso_3_code": "pee", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1564", + "scripts": [], + "own_tokenizer": false }, { "name": "Tajio", "iso_1_code": null, "iso_3_code": "tdj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1565", + "scripts": [], + "own_tokenizer": false }, { "name": "Pendau", "iso_1_code": null, "iso_3_code": "ums", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1566", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1561", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1556", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1552", + "scripts": [], + "own_tokenizer": false }, { "name": "Wotu-Wolio", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Wotu", "iso_1_code": null, "iso_3_code": "wtw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1568", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kalao", "iso_1_code": null, "iso_3_code": "kly", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1570", + "scripts": [], + "own_tokenizer": false }, { "name": "Laiyolo", "iso_1_code": null, "iso_3_code": "lji", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1571", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1569", + "scripts": [], + "own_tokenizer": false }, { "name": "Wolio-Kamaru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kamaru", "iso_1_code": null, "iso_3_code": "kgx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1573", + "scripts": [], + "own_tokenizer": false }, { "name": "Wolio", "iso_1_code": null, "iso_3_code": "wlo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1574", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1572", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1567", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1473", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pampangan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kapampangan", "iso_1_code": null, "iso_3_code": "pam", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1577", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1576", + "scripts": [], + "own_tokenizer": false }, { "name": "Sambalic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [ + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } + }, + "children": [ { "name": "Ayta, Ambala", "iso_1_code": null, "iso_3_code": "abc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1579", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayta, Abellen", "iso_1_code": null, "iso_3_code": "abp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1580", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayta, Magbukun", "iso_1_code": null, "iso_3_code": "ayt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1581", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayta, Mag-Indi", "iso_1_code": null, "iso_3_code": "blx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1582", + "scripts": [], + "own_tokenizer": false }, { "name": "Sambal, Botolan", "iso_1_code": null, "iso_3_code": "sbl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1583", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ayta, Mag-antsi", "iso_1_code": null, "iso_3_code": "sgb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1584", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bolinao", "iso_1_code": null, "iso_3_code": "smk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1585", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sambal", "iso_1_code": null, "iso_3_code": "xsb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1586", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1578", + "scripts": [], + "own_tokenizer": false }, { "name": "Sinauna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dumagat, Remontado", "iso_1_code": null, "iso_3_code": "agv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1588", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1587", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1575", + "scripts": [], + "own_tokenizer": false }, { "name": "Central-Eastern Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Barakai", "iso_1_code": null, "iso_3_code": "baj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1591", + "scripts": [], + "own_tokenizer": false }, { "name": "Gwatlelir", "iso_1_code": null, "iso_3_code": "bay", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1592", + "scripts": [], + "own_tokenizer": false }, { "name": "Koba", "iso_1_code": null, "iso_3_code": "kpd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1593", + "scripts": [], + "own_tokenizer": false }, { "name": "Dobel", "iso_1_code": null, "iso_3_code": "kvo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1594", + "scripts": [], + "own_tokenizer": false }, { "name": "Kompane", "iso_1_code": null, "iso_3_code": "kvp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1595", + "scripts": [], + "own_tokenizer": false }, { "name": "Kola", "iso_1_code": null, "iso_3_code": "kvv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1596", + "scripts": [], + "own_tokenizer": false }, { "name": "Karey", "iso_1_code": null, "iso_3_code": "kyd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1597", + "scripts": [], + "own_tokenizer": false }, { "name": "Lola", "iso_1_code": null, "iso_3_code": "lcd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1598", + "scripts": [], + "own_tokenizer": false }, { "name": "Lorang", "iso_1_code": null, "iso_3_code": "lrn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1599", + "scripts": [], + "own_tokenizer": false }, { "name": "Mariri", "iso_1_code": null, "iso_3_code": "mqi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1600", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarangan, East", "iso_1_code": null, "iso_3_code": "tre", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1601", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarangan, West", "iso_1_code": null, "iso_3_code": "txn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1602", + "scripts": [], + "own_tokenizer": false }, { "name": "Ujir", "iso_1_code": null, "iso_3_code": "udj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1603", + "scripts": [], + "own_tokenizer": false }, { "name": "Manombai", "iso_1_code": null, "iso_3_code": "woo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1604", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1590", + "scripts": [], + "own_tokenizer": false }, { "name": "Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Babar, North", "iso_1_code": null, "iso_3_code": "bcd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1607", + "scripts": [], + "own_tokenizer": false }, { "name": "Dawera-Daweloor", "iso_1_code": null, "iso_3_code": "ddw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1608", + "scripts": [], + "own_tokenizer": false }, { "name": "Dai", "iso_1_code": null, "iso_3_code": "dij", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1609", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1606", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Masela-South Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Masela, West", "iso_1_code": null, "iso_3_code": "mss", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1612", + "scripts": [], + "own_tokenizer": false }, { "name": "Masela, Central", "iso_1_code": null, "iso_3_code": "mxz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1613", + "scripts": [], + "own_tokenizer": false }, { "name": "Serili", "iso_1_code": null, "iso_3_code": "sve", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1614", + "scripts": [], + "own_tokenizer": false }, { "name": "Babar, Southeast", "iso_1_code": null, "iso_3_code": "vbb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1615", + "scripts": [], + "own_tokenizer": false }, { "name": "Masela, East", "iso_1_code": null, "iso_3_code": "vme", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1616", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1611", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Emplawas", "iso_1_code": null, "iso_3_code": "emw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1618", + "scripts": [], + "own_tokenizer": false }, { "name": "Imroing", "iso_1_code": null, "iso_3_code": "imr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1619", + "scripts": [], + "own_tokenizer": false }, { "name": "Tela-Masbuar", "iso_1_code": null, "iso_3_code": "tvm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1620", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1617", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1610", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1605", + "scripts": [], + "own_tokenizer": false }, { "name": "Bima-Lembata", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Adonara", "iso_1_code": null, "iso_3_code": "adr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1622", + "scripts": [], + "own_tokenizer": false }, { "name": "Alor", "iso_1_code": null, "iso_3_code": "aol", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1623", + "scripts": [], + "own_tokenizer": false }, { "name": "Bima", "iso_1_code": null, "iso_3_code": "bhp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1624", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ende", "iso_1_code": null, "iso_3_code": "end", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1625", + "scripts": [], + "own_tokenizer": false }, { "name": "Ile Ape", "iso_1_code": null, "iso_3_code": "ila", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1626", + "scripts": [], + "own_tokenizer": false }, { "name": "Kedang", "iso_1_code": null, "iso_3_code": "ksx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1627", + "scripts": [], + "own_tokenizer": false }, { "name": "Kepo\u2019", "iso_1_code": null, "iso_3_code": "kuk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1628", + "scripts": [], + "own_tokenizer": false }, { "name": "Komodo", "iso_1_code": null, "iso_3_code": "kvh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1629", + "scripts": [], + "own_tokenizer": false }, { "name": "Li\u2019o", "iso_1_code": null, "iso_3_code": "ljl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1630", + "scripts": [], + "own_tokenizer": false }, { "name": "Painara", "iso_1_code": null, "iso_3_code": "lmf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1631", + "scripts": [], + "own_tokenizer": false }, { "name": "Labalekan-Mingar", "iso_1_code": null, "iso_3_code": "lmj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1632", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamatuka", "iso_1_code": null, "iso_3_code": "lmq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1633", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamalera", "iso_1_code": null, "iso_3_code": "lmr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1634", + "scripts": [], + "own_tokenizer": false }, { "name": "Levuka", "iso_1_code": null, "iso_3_code": "lvu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1635", + "scripts": [], + "own_tokenizer": false }, { "name": "Lewoeleng", "iso_1_code": null, "iso_3_code": "lwe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1636", + "scripts": [], + "own_tokenizer": false }, { "name": "Lewotobi", "iso_1_code": null, "iso_3_code": "lwt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1637", + "scripts": [], + "own_tokenizer": false }, { "name": "Manggarai", "iso_1_code": null, "iso_3_code": "mqy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1638", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngad\u2019a, Eastern", "iso_1_code": null, "iso_3_code": "nea", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1639", + "scripts": [], + "own_tokenizer": false }, { "name": "Nage", "iso_1_code": null, "iso_3_code": "nxe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1640", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngad\u2019a", "iso_1_code": null, "iso_3_code": "nxg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1641", + "scripts": [], + "own_tokenizer": false }, { "name": "Palu\u2019e", "iso_1_code": null, "iso_3_code": "ple", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1642", + "scripts": [], + "own_tokenizer": false }, { "name": "Rembong", "iso_1_code": null, "iso_3_code": "reb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1643", + "scripts": [], + "own_tokenizer": false }, { "name": "Riung", "iso_1_code": null, "iso_3_code": "riu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1644", + "scripts": [], + "own_tokenizer": false }, { "name": "Rajong", "iso_1_code": null, "iso_3_code": "rjg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1645", + "scripts": [], + "own_tokenizer": false }, { "name": "Rongga", "iso_1_code": null, "iso_3_code": "ror", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1646", + "scripts": [], + "own_tokenizer": false }, { "name": "Sika", "iso_1_code": null, "iso_3_code": "ski", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1647", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamaholot", "iso_1_code": null, "iso_3_code": "slp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1648", + "scripts": [], + "own_tokenizer": false }, { "name": "So\u2019a", "iso_1_code": null, "iso_3_code": "ssq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1649", + "scripts": [], + "own_tokenizer": false }, { "name": "Wae Rana", "iso_1_code": null, "iso_3_code": "wrx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1650", + "scripts": [], + "own_tokenizer": false }, { "name": "Ke\u2019o", "iso_1_code": null, "iso_3_code": "xxk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1651", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1621", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ambelau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ambelau", "iso_1_code": null, "iso_3_code": "amv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1654", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1653", + "scripts": [], + "own_tokenizer": false }, { "name": "Buru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lisela", "iso_1_code": null, "iso_3_code": "lcl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1656", + "scripts": [], + "own_tokenizer": false }, { "name": "Buru", "iso_1_code": null, "iso_3_code": "mhs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1657", + "scripts": [], + "own_tokenizer": false }, { "name": "Moksela", "iso_1_code": null, "iso_3_code": "vms", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1658", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1655", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Manipa", "iso_1_code": null, "iso_3_code": "mqp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1660", + "scripts": [], + "own_tokenizer": false }, { "name": "Banda-Geser", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Banda", "iso_1_code": null, "iso_3_code": "bnd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1662", + "scripts": [], + "own_tokenizer": false }, { "name": "Geser-Gorom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bati", "iso_1_code": null, "iso_3_code": "bvt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1664", + "scripts": [], + "own_tokenizer": false }, { "name": "Geser-Gorom", "iso_1_code": null, "iso_3_code": "ges", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1665", + "scripts": [], + "own_tokenizer": false }, { "name": "Watubela", "iso_1_code": null, "iso_3_code": "wah", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1666", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1663", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1661", + "scripts": [], + "own_tokenizer": false }, { "name": "Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bobot", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bobot", "iso_1_code": null, "iso_3_code": "bty", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1669", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1668", + "scripts": [], + "own_tokenizer": false }, { "name": "East Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hoti", "iso_1_code": null, "iso_3_code": "hti", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1671", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1670", + "scripts": [], + "own_tokenizer": false }, { "name": "Manusela-Seti", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Benggoi", "iso_1_code": null, "iso_3_code": "bgy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1673", + "scripts": [], + "own_tokenizer": false }, { "name": "Huaulu", "iso_1_code": null, "iso_3_code": "hud", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1674", + "scripts": [], + "own_tokenizer": false }, { "name": "Salas", "iso_1_code": null, "iso_3_code": "sgu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1675", + "scripts": [], + "own_tokenizer": false }, { "name": "Liana-Seti", "iso_1_code": null, "iso_3_code": "ste", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1676", + "scripts": [], + "own_tokenizer": false }, { "name": "Sou Upaa", "iso_1_code": null, "iso_3_code": "wha", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1677", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1672", + "scripts": [], + "own_tokenizer": false }, { "name": "Masiwang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Masiwang", "iso_1_code": null, "iso_3_code": "bnf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1679", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1678", + "scripts": [], + "own_tokenizer": false }, { "name": "Nunusaku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kayeli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kayeli", "iso_1_code": null, "iso_3_code": "kzl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1682", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1681", + "scripts": [], + "own_tokenizer": false }, { "name": "Piru Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Haruku", "iso_1_code": null, "iso_3_code": "hrk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1684", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kaibobo", "iso_1_code": null, "iso_3_code": "kzb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1686", + "scripts": [], + "own_tokenizer": false }, { "name": "Sepa", "iso_1_code": null, "iso_3_code": "spb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1687", + "scripts": [], + "own_tokenizer": false }, { "name": "Sou Nama", "iso_1_code": null, "iso_3_code": "tlt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1688", + "scripts": [], + "own_tokenizer": false }, { "name": "Seram Straits", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ambon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hitu", "iso_1_code": null, "iso_3_code": "htu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1691", + "scripts": [], + "own_tokenizer": false }, { "name": "Laha", "iso_1_code": null, "iso_3_code": "lhh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1692", + "scripts": [], + "own_tokenizer": false }, { "name": "Tulehu", "iso_1_code": null, "iso_3_code": "tlu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1693", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1690", + "scripts": [], + "own_tokenizer": false }, { "name": "Solehua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Paulohi", "iso_1_code": null, "iso_3_code": "plh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1695", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1694", + "scripts": [], + "own_tokenizer": false }, { "name": "Uliase", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hatuhaha", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Elpaputi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Amahai", "iso_1_code": null, "iso_3_code": "amq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1699", + "scripts": [], + "own_tokenizer": false }, { "name": "Nusa Laut", "iso_1_code": null, "iso_3_code": "nul", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1700", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1698", + "scripts": [], + "own_tokenizer": false }, { "name": "Saparua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Latu", "iso_1_code": null, "iso_3_code": "ltu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1702", + "scripts": [], + "own_tokenizer": false }, { "name": "Saparua", "iso_1_code": null, "iso_3_code": "spr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1703", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1701", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1697", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamarian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kamarian", "iso_1_code": null, "iso_3_code": "kzx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1705", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1704", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1696", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1689", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1685", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Asilulu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Asilulu", "iso_1_code": null, "iso_3_code": "asl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1708", + "scripts": [], + "own_tokenizer": false }, { "name": "Seit-Kaitetu", "iso_1_code": null, "iso_3_code": "hik", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1709", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1707", + "scripts": [], + "own_tokenizer": false }, { "name": "Hoamoal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Larike-Wakasihu", "iso_1_code": null, "iso_3_code": "alo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1711", + "scripts": [], + "own_tokenizer": false }, { "name": "Boano", "iso_1_code": null, "iso_3_code": "bzn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1712", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1710", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1706", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1683", + "scripts": [], + "own_tokenizer": false }, { "name": "Three Rivers", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yalahatan", "iso_1_code": null, "iso_3_code": "jal", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1714", + "scripts": [], + "own_tokenizer": false }, { "name": "Amalumute", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northwest Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Luhu", "iso_1_code": null, "iso_3_code": "lcq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1717", + "scripts": [], + "own_tokenizer": false }, { "name": "Lisabata-Nuniali", "iso_1_code": null, "iso_3_code": "lcs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1718", + "scripts": [], + "own_tokenizer": false }, { "name": "Hulung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hulung", "iso_1_code": null, "iso_3_code": "huk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1720", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1719", + "scripts": [], + "own_tokenizer": false }, { "name": "Loun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Loun", "iso_1_code": null, "iso_3_code": "lox", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1722", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1721", + "scripts": [], + "own_tokenizer": false }, { "name": "Ulat Inai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Alune", "iso_1_code": null, "iso_3_code": "alp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1724", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naka\u2019ela", "iso_1_code": null, "iso_3_code": "nae", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1725", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1723", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1716", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1715", + "scripts": [], + "own_tokenizer": false }, { "name": "Wemale", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Wemale", "iso_1_code": null, "iso_3_code": "weo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1727", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1726", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1713", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1680", + "scripts": [], + "own_tokenizer": false }, { "name": "Sawai-Nuaulu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nuaulu, North", "iso_1_code": null, "iso_3_code": "nni", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1729", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuaulu, South", "iso_1_code": null, "iso_3_code": "nxl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1730", + "scripts": [], + "own_tokenizer": false }, { "name": "Saleman", "iso_1_code": null, "iso_3_code": "sau", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1731", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1728", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1667", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1659", + "scripts": [], + "own_tokenizer": false }, { "name": "Sula", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mangole", "iso_1_code": null, "iso_3_code": "mqc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1733", + "scripts": [], + "own_tokenizer": false }, { "name": "Sula", "iso_1_code": null, "iso_3_code": "szn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1734", + "scripts": [], + "own_tokenizer": false }, { "name": "Taliabo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kadai", "iso_1_code": null, "iso_3_code": "kzd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1736", + "scripts": [], + "own_tokenizer": false }, { "name": "Taliabu", "iso_1_code": null, "iso_3_code": "tlv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1737", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1735", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1732", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1652", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [ + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } + }, + "children": [ { "name": "Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Admiralty Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Manus", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Andra-Hus", "iso_1_code": null, "iso_3_code": "anx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1744", + "scripts": [], + "own_tokenizer": false }, { "name": "Elu", "iso_1_code": null, "iso_3_code": "elu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1745", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurti", "iso_1_code": null, "iso_3_code": "ktm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1746", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "kxr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1747", + "scripts": [], + "own_tokenizer": false }, { "name": "Leipon", "iso_1_code": null, "iso_3_code": "lek", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1748", + "scripts": [], + "own_tokenizer": false }, { "name": "Lele", "iso_1_code": null, "iso_3_code": "lle", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1749", + "scripts": [], + "own_tokenizer": false }, { "name": "Ponam", "iso_1_code": null, "iso_3_code": "ncc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1750", + "scripts": [], + "own_tokenizer": false }, { "name": "Nali", "iso_1_code": null, "iso_3_code": "nss", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1751", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kele", "iso_1_code": null, "iso_3_code": "sbc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1752", + "scripts": [], + "own_tokenizer": false }, { "name": "Titan", "iso_1_code": null, "iso_3_code": "ttv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1753", + "scripts": [], + "own_tokenizer": false }, { "name": "Ere", "iso_1_code": null, "iso_3_code": "twp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1754", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1743", + "scripts": [], + "own_tokenizer": false }, { "name": "Mokoreng-Loniu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Loniu", "iso_1_code": null, "iso_3_code": "los", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1756", + "scripts": [], + "own_tokenizer": false }, { "name": "Idio", "iso_1_code": null, "iso_3_code": "mft", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1757", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1755", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bipi", "iso_1_code": null, "iso_3_code": "biq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1759", + "scripts": [], + "own_tokenizer": false }, { "name": "Likum", "iso_1_code": null, "iso_3_code": "lib", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1760", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyindrou", "iso_1_code": null, "iso_3_code": "lid", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1761", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hermit", "iso_1_code": null, "iso_3_code": "llf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1762", + "scripts": [], + "own_tokenizer": false }, { "name": "Mondropolon", "iso_1_code": null, "iso_3_code": "npn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1763", + "scripts": [], + "own_tokenizer": false }, { "name": "Tulu-Bohuai", "iso_1_code": null, "iso_3_code": "rak", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1764", + "scripts": [], + "own_tokenizer": false }, { "name": "Sori-Harengan", "iso_1_code": null, "iso_3_code": "sbh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1765", + "scripts": [], + "own_tokenizer": false }, { "name": "Khehek", "iso_1_code": null, "iso_3_code": "tlx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1766", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1758", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1742", + "scripts": [], + "own_tokenizer": false }, { "name": "Pak-Tong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pak-Tong", "iso_1_code": null, "iso_3_code": "pkg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1768", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1767", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeast Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Paluai", "iso_1_code": null, "iso_3_code": "blq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1770", + "scripts": [], + "own_tokenizer": false }, { "name": "Lenkau", "iso_1_code": null, "iso_3_code": "ler", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1771", + "scripts": [], + "own_tokenizer": false }, { "name": "Lou", "iso_1_code": null, "iso_3_code": "loj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1772", + "scripts": [], + "own_tokenizer": false }, { "name": "Neherneh", "iso_1_code": null, "iso_3_code": "ncn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1773", + "scripts": [], + "own_tokenizer": false }, { "name": "Penchal", "iso_1_code": null, "iso_3_code": "pek", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1774", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1769", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1741", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kaniet", "iso_1_code": null, "iso_3_code": "ktk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1776", + "scripts": [], + "own_tokenizer": false }, { "name": "Seimat", "iso_1_code": null, "iso_3_code": "ssg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1777", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wuvulu-Aua", "iso_1_code": null, "iso_3_code": "wuv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1778", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1775", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1740", + "scripts": [], + "own_tokenizer": false }, { "name": "Central-Eastern Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Remote Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central Pacific", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East Fijian-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East Fijian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Fijian", "iso_1_code": "fj", "iso_3_code": "fij", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1784", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gone Dau", "iso_1_code": null, "iso_3_code": "goo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1785", + "scripts": [], + "own_tokenizer": false }, { "name": "Lauan", "iso_1_code": null, "iso_3_code": "llx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1786", + "scripts": [], + "own_tokenizer": false }, { "name": "Lomaiviti", "iso_1_code": null, "iso_3_code": "lmv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1787", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1783", + "scripts": [], + "own_tokenizer": false }, { "name": "Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rapa", "iso_1_code": null, "iso_3_code": "ray", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1792", + "scripts": [], + "own_tokenizer": false }, { "name": "Marquesic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hawaiian", "iso_1_code": null, "iso_3_code": "haw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1794", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Marquesan, South", "iso_1_code": null, "iso_3_code": "mqm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1795", + "scripts": [], + "own_tokenizer": false }, { "name": "Marquesan, North", "iso_1_code": null, "iso_3_code": "mrq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1796", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mangareva", "iso_1_code": null, "iso_3_code": "mrv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1797", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1793", + "scripts": [], + "own_tokenizer": false }, { "name": "Tahitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Austral", "iso_1_code": null, "iso_3_code": "aut", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1799", + "scripts": [], + "own_tokenizer": false }, { "name": "Maori", "iso_1_code": "mi", "iso_3_code": "mri", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1800", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tuamotuan", "iso_1_code": null, "iso_3_code": "pmt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1801", + "scripts": [], + "own_tokenizer": false }, { "name": "Penrhyn", "iso_1_code": null, "iso_3_code": "pnh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1802", + "scripts": [], + "own_tokenizer": false }, { "name": "Cook Islands Maori", "iso_1_code": null, "iso_3_code": "rar", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1803", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Rakahanga-Manihiki", "iso_1_code": null, "iso_3_code": "rkh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1804", + "scripts": [], + "own_tokenizer": false }, { "name": "Moriori", "iso_1_code": null, "iso_3_code": "rrm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1805", + "scripts": [], + "own_tokenizer": false }, { "name": "Tahitian", "iso_1_code": "ty", "iso_3_code": "tah", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1806", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1798", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1791", + "scripts": [], + "own_tokenizer": false }, { "name": "Rapanui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rapa Nui", "iso_1_code": null, "iso_3_code": "rap", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1808", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1807", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1790", + "scripts": [], + "own_tokenizer": false }, { "name": "Samoic-Outlier", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East Uvean-Niuafo\u2019ou", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Niuatoputapu", "iso_1_code": null, "iso_3_code": "nkp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1811", + "scripts": [], + "own_tokenizer": false }, { "name": "Niuafo\u2019ou", "iso_1_code": null, "iso_3_code": "num", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1812", + "scripts": [], + "own_tokenizer": false }, { "name": "Wallisian", "iso_1_code": null, "iso_3_code": "wls", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1813", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1810", + "scripts": [], + "own_tokenizer": false }, { "name": "Ellicean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kapingamarangi", "iso_1_code": null, "iso_3_code": "kpg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1815", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Takuu", "iso_1_code": null, "iso_3_code": "nho", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1816", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nukuoro", "iso_1_code": null, "iso_3_code": "nkr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1817", + "scripts": [], + "own_tokenizer": false }, { "name": "Nukumanu", "iso_1_code": null, "iso_3_code": "nuq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1818", + "scripts": [], + "own_tokenizer": false }, { "name": "Nukeria", "iso_1_code": null, "iso_3_code": "nur", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1819", + "scripts": [], + "own_tokenizer": false }, { "name": "Ontong Java", "iso_1_code": null, "iso_3_code": "ojv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1820", + "scripts": [], + "own_tokenizer": false }, { "name": "Sikaiana", "iso_1_code": null, "iso_3_code": "sky", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1821", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuvaluan", "iso_1_code": null, "iso_3_code": "tvl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1822", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1814", + "scripts": [], + "own_tokenizer": false }, { "name": "Futunic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Anuta", "iso_1_code": null, "iso_3_code": "aud", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1824", + "scripts": [], + "own_tokenizer": false }, { "name": "Futuna, East", "iso_1_code": null, "iso_3_code": "fud", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1825", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Futuna-Aniwa", "iso_1_code": null, "iso_3_code": "fut", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1826", + "scripts": [], + "own_tokenizer": false }, { "name": "Emae", "iso_1_code": null, "iso_3_code": "mmw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1827", + "scripts": [], + "own_tokenizer": false }, { "name": "Rennell-Bellona", "iso_1_code": null, "iso_3_code": "mnv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1828", + "scripts": [], + "own_tokenizer": false }, { "name": "Mele-Fila", "iso_1_code": null, "iso_3_code": "mxe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1829", + "scripts": [], + "own_tokenizer": false }, { "name": "Vaeakau-Taumako", "iso_1_code": null, "iso_3_code": "piv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1830", + "scripts": [], + "own_tokenizer": false }, { "name": "Tikopia", "iso_1_code": null, "iso_3_code": "tkp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1831", + "scripts": [], + "own_tokenizer": false }, { "name": "Fagauvea", "iso_1_code": null, "iso_3_code": "uve", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1832", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1823", + "scripts": [], + "own_tokenizer": false }, { "name": "Pukapuka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pukapuka", "iso_1_code": null, "iso_3_code": "pkp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1834", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1833", + "scripts": [], + "own_tokenizer": false }, { "name": "Samoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Samoan", "iso_1_code": "sm", "iso_3_code": "smo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1836", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1835", + "scripts": [], + "own_tokenizer": false }, { "name": "Tokelauan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tokelauan", "iso_1_code": null, "iso_3_code": "tkl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1838", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1837", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1809", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1789", + "scripts": [], + "own_tokenizer": false }, { "name": "Tongic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Niue", "iso_1_code": null, "iso_3_code": "niu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1840", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tongan", "iso_1_code": "to", "iso_3_code": "ton", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1841", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1839", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1788", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1782", + "scripts": [], + "own_tokenizer": false }, { "name": "West Fijian-Rotuman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rotuman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rotuman", "iso_1_code": null, "iso_3_code": "rtm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1844", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1843", + "scripts": [], + "own_tokenizer": false }, { "name": "West Fijian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Namosi-Naitasiri-Serua", "iso_1_code": null, "iso_3_code": "bwb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1846", + "scripts": [], + "own_tokenizer": false }, { "name": "Fijian, Western", "iso_1_code": null, "iso_3_code": "wyy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1847", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1845", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1842", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1781", + "scripts": [], + "own_tokenizer": false }, { "name": "Loyalty Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Drehu", "iso_1_code": null, "iso_3_code": "dhv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1849", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Iaai", "iso_1_code": null, "iso_3_code": "iai", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1850", + "scripts": [], + "own_tokenizer": false }, { "name": "Nengone", "iso_1_code": null, "iso_3_code": "nen", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1851", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1848", + "scripts": [], + "own_tokenizer": false }, { "name": "Micronesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Micronesian Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ikiribati", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kiribati", "iso_1_code": null, "iso_3_code": "gil", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1855", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1854", + "scripts": [], + "own_tokenizer": false }, { "name": "Kusaiean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kosraean", "iso_1_code": null, "iso_3_code": "kos", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1857", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1856", + "scripts": [], + "own_tokenizer": false }, { "name": "Marshallese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Marshallese", "iso_1_code": "mh", "iso_3_code": "mah", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1859", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1858", + "scripts": [], + "own_tokenizer": false }, { "name": "Pohnpeic-Chuukic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chuukic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Carolinian", "iso_1_code": null, "iso_3_code": "cal", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1862", + "scripts": [], + "own_tokenizer": false }, { "name": "Chuukese", "iso_1_code": null, "iso_3_code": "chk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1863", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mapia", "iso_1_code": null, "iso_3_code": "mpy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1864", + "scripts": [], + "own_tokenizer": false }, { "name": "Mortlockese", "iso_1_code": null, "iso_3_code": "mrl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1865", + "scripts": [], + "own_tokenizer": false }, { "name": "Namonuito", "iso_1_code": null, "iso_3_code": "nmt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1866", + "scripts": [], + "own_tokenizer": false }, { "name": "P\u00e1\u00e1fang", "iso_1_code": null, "iso_3_code": "pfa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1867", + "scripts": [], + "own_tokenizer": false }, { "name": "Puluwatese", "iso_1_code": null, "iso_3_code": "puw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1868", + "scripts": [], + "own_tokenizer": false }, { "name": "Sonsorolese", "iso_1_code": null, "iso_3_code": "sov", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1869", + "scripts": [], + "own_tokenizer": false }, { "name": "Satawalese", "iso_1_code": null, "iso_3_code": "stw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1870", + "scripts": [], + "own_tokenizer": false }, { "name": "Tobian", "iso_1_code": null, "iso_3_code": "tox", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1871", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanapag", "iso_1_code": null, "iso_3_code": "tpv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1872", + "scripts": [], + "own_tokenizer": false }, { "name": "Ulithian", "iso_1_code": null, "iso_3_code": "uli", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1873", + "scripts": [], + "own_tokenizer": false }, { "name": "Woleaian", "iso_1_code": null, "iso_3_code": "woe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1874", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1861", + "scripts": [], + "own_tokenizer": false }, { "name": "Pohnpeic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mokilese", "iso_1_code": null, "iso_3_code": "mkj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1876", + "scripts": [], + "own_tokenizer": false }, { "name": "Pingelapese", "iso_1_code": null, "iso_3_code": "pif", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1877", + "scripts": [], + "own_tokenizer": false }, { "name": "Pohnpeian", "iso_1_code": null, "iso_3_code": "pon", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1878", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1875", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1860", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1853", + "scripts": [], + "own_tokenizer": false }, { "name": "Nauruan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nauruan", "iso_1_code": "na", "iso_3_code": "nau", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1880", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1879", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1852", + "scripts": [], + "own_tokenizer": false }, { "name": "New Caledonian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Haekic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Haeke", "iso_1_code": null, "iso_3_code": "aek", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1883", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1882", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Haveke", "iso_1_code": null, "iso_3_code": "hvk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1885", + "scripts": [], + "own_tokenizer": false }, { "name": "Vamale", "iso_1_code": null, "iso_3_code": "mkt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1886", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Cemuh\u00ee", "iso_1_code": null, "iso_3_code": "cam", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1888", + "scripts": [], + "own_tokenizer": false }, { "name": "Paic\u00ee", "iso_1_code": null, "iso_3_code": "pri", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1889", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1887", + "scripts": [], + "own_tokenizer": false }, { "name": "Extreme Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Caac", "iso_1_code": null, "iso_3_code": "msq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1891", + "scripts": [], + "own_tokenizer": false }, { "name": "N\u00eal\u00eamwa-Nixumwak", "iso_1_code": null, "iso_3_code": "nee", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1892", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuanga", "iso_1_code": null, "iso_3_code": "nua", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1893", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyel\u00e2yu", "iso_1_code": null, "iso_3_code": "yly", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1894", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1890", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pwaamei", "iso_1_code": null, "iso_3_code": "pme", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1896", + "scripts": [], + "own_tokenizer": false }, { "name": "Pwapw\u00e2", "iso_1_code": null, "iso_3_code": "pop", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1897", + "scripts": [], + "own_tokenizer": false }, { "name": "Hmwaveke", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bwatoo", "iso_1_code": null, "iso_3_code": "bwa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1899", + "scripts": [], + "own_tokenizer": false }, { "name": "Hmwaveke", "iso_1_code": null, "iso_3_code": "mrk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1900", + "scripts": [], + "own_tokenizer": false }, { "name": "Waamwang", "iso_1_code": null, "iso_3_code": "wmn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1901", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1898", + "scripts": [], + "own_tokenizer": false }, { "name": "Nemi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Fw\u00e2i", "iso_1_code": null, "iso_3_code": "fwa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1903", + "scripts": [], + "own_tokenizer": false }, { "name": "Jawe", "iso_1_code": null, "iso_3_code": "jaz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1904", + "scripts": [], + "own_tokenizer": false }, { "name": "Nemi", "iso_1_code": null, "iso_3_code": "nem", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1905", + "scripts": [], + "own_tokenizer": false }, { "name": "Pije", "iso_1_code": null, "iso_3_code": "piz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1906", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1902", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1895", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1884", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Extreme Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Drubea", "iso_1_code": null, "iso_3_code": "duf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1909", + "scripts": [], + "own_tokenizer": false }, { "name": "Num\u00e8\u00e8", "iso_1_code": null, "iso_3_code": "kdk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1910", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1908", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Wailic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aji\u00eb", "iso_1_code": null, "iso_3_code": "aji", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1913", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Arh\u00f6", "iso_1_code": null, "iso_3_code": "aok", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1914", + "scripts": [], + "own_tokenizer": false }, { "name": "Arh\u00e2", "iso_1_code": null, "iso_3_code": "aqr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1915", + "scripts": [], + "own_tokenizer": false }, { "name": "Orowe", "iso_1_code": null, "iso_3_code": "bpk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1916", + "scripts": [], + "own_tokenizer": false }, { "name": "Neku", "iso_1_code": null, "iso_3_code": "nek", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1917", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1912", + "scripts": [], + "own_tokenizer": false }, { "name": "Xaracuu-Xaragure", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "X\u00e2r\u00e2c\u00f9\u00f9", "iso_1_code": null, "iso_3_code": "ane", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1919", + "scripts": [], + "own_tokenizer": false }, { "name": "X\u00e2r\u00e2gur\u00e8", "iso_1_code": null, "iso_3_code": "axx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1920", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1918", + "scripts": [], + "own_tokenizer": false }, { "name": "Zire-Tiri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "T\u00eer\u00ee", "iso_1_code": null, "iso_3_code": "cir", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1922", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00eesh\u00eb\u00eb", "iso_1_code": null, "iso_3_code": "sih", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1923", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1921", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1911", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1907", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1881", + "scripts": [], + "own_tokenizer": false }, { "name": "North and Central Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East Santo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "N\u2019kep", "iso_1_code": null, "iso_3_code": "sku", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1927", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1926", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Butmas-Tur", "iso_1_code": null, "iso_3_code": "bnr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1929", + "scripts": [], + "own_tokenizer": false }, { "name": "Lorediakarkar", "iso_1_code": null, "iso_3_code": "lnn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1930", + "scripts": [], + "own_tokenizer": false }, { "name": "Atin", "iso_1_code": null, "iso_3_code": "plb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1931", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngen", "iso_1_code": null, "iso_3_code": "ssv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1932", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1928", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1925", + "scripts": [], + "own_tokenizer": false }, { "name": "Malekula Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Labo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ninde", "iso_1_code": null, "iso_3_code": "mwi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1935", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1934", + "scripts": [], + "own_tokenizer": false }, { "name": "Malekula Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Neverver", "iso_1_code": null, "iso_3_code": "lgk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1937", + "scripts": [], + "own_tokenizer": false }, { "name": "Larevat", "iso_1_code": null, "iso_3_code": "lrv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1938", + "scripts": [], + "own_tokenizer": false }, { "name": "Litzlitz", "iso_1_code": null, "iso_3_code": "lzl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1939", + "scripts": [], + "own_tokenizer": false }, { "name": "Maragus", "iso_1_code": null, "iso_3_code": "mrs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1940", + "scripts": [], + "own_tokenizer": false }, { "name": "V\u2019\u00ebnen Taut", "iso_1_code": null, "iso_3_code": "nmb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1941", + "scripts": [], + "own_tokenizer": false }, { "name": "Nasarian", "iso_1_code": null, "iso_3_code": "nvh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1942", + "scripts": [], + "own_tokenizer": false }, { "name": "Avava", "iso_1_code": null, "iso_3_code": "tmb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1943", + "scripts": [], + "own_tokenizer": false }, { "name": "Neve\u2019ei", "iso_1_code": null, "iso_3_code": "vnm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1944", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1936", + "scripts": [], + "own_tokenizer": false }, { "name": "Small Nambas", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dixon Reef", "iso_1_code": null, "iso_3_code": "dix", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1946", + "scripts": [], + "own_tokenizer": false }, { "name": "Letemboi", "iso_1_code": null, "iso_3_code": "nms", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1947", + "scripts": [], + "own_tokenizer": false }, { "name": "Repanbitip", "iso_1_code": null, "iso_3_code": "rpn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1948", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1945", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1933", + "scripts": [], + "own_tokenizer": false }, { "name": "Northeast Vanuatu-Banks Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Efate, South", "iso_1_code": null, "iso_3_code": "erk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1951", + "scripts": [], + "own_tokenizer": false }, { "name": "Eton", "iso_1_code": null, "iso_3_code": "etn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1952", + "scripts": [], + "own_tokenizer": false }, { "name": "Efate, North", "iso_1_code": null, "iso_3_code": "llp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1953", + "scripts": [], + "own_tokenizer": false }, { "name": "Lelepa", "iso_1_code": null, "iso_3_code": "lpa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1954", + "scripts": [], + "own_tokenizer": false }, { "name": "Namakura", "iso_1_code": null, "iso_3_code": "nmk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1955", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1950", + "scripts": [], + "own_tokenizer": false }, { "name": "East Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Apma", "iso_1_code": null, "iso_3_code": "app", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1957", + "scripts": [], + "own_tokenizer": false }, { "name": "Daakaka", "iso_1_code": null, "iso_3_code": "bpa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1958", + "scripts": [], + "own_tokenizer": false }, { "name": "Baetora", "iso_1_code": null, "iso_3_code": "btr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1959", + "scripts": [], + "own_tokenizer": false }, { "name": "Lonwolwol", "iso_1_code": null, "iso_3_code": "crc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1960", + "scripts": [], + "own_tokenizer": false }, { "name": "Fanbak", "iso_1_code": null, "iso_3_code": "fnb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1961", + "scripts": [], + "own_tokenizer": false }, { "name": "Hiw", "iso_1_code": null, "iso_3_code": "hiw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1962", + "scripts": [], + "own_tokenizer": false }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "krf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1963", + "scripts": [], + "own_tokenizer": false }, { "name": "Lo-Toga", "iso_1_code": null, "iso_3_code": "lht", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1964", + "scripts": [], + "own_tokenizer": false }, { "name": "Lakon", "iso_1_code": null, "iso_3_code": "lkn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1965", + "scripts": [], + "own_tokenizer": false }, { "name": "Hano", "iso_1_code": null, "iso_3_code": "lml", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1966", + "scripts": [], + "own_tokenizer": false }, { "name": "Lemerig", "iso_1_code": null, "iso_3_code": "lrz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1967", + "scripts": [], + "own_tokenizer": false }, { "name": "Mwotlap", "iso_1_code": null, "iso_3_code": "mlv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1968", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambrym, North", "iso_1_code": null, "iso_3_code": "mmg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1969", + "scripts": [], + "own_tokenizer": false }, { "name": "Marino", "iso_1_code": null, "iso_3_code": "mrb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1970", + "scripts": [], + "own_tokenizer": false }, { "name": "Mwerlap", "iso_1_code": null, "iso_3_code": "mrm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1971", + "scripts": [], + "own_tokenizer": false }, { "name": "Vur\u00ebs", "iso_1_code": null, "iso_3_code": "msn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1972", + "scripts": [], + "own_tokenizer": false }, { "name": "Mota", "iso_1_code": null, "iso_3_code": "mtt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1973", + "scripts": [], + "own_tokenizer": false }, { "name": "Maewo, Central", "iso_1_code": null, "iso_3_code": "mwo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1974", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambae, West", "iso_1_code": null, "iso_3_code": "nnd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1975", + "scripts": [], + "own_tokenizer": false }, { "name": "Olrat", "iso_1_code": null, "iso_3_code": "olr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1976", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambae, East", "iso_1_code": null, "iso_3_code": "omb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1977", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Paama", "iso_1_code": null, "iso_3_code": "pma", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1978", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Port Vato", "iso_1_code": null, "iso_3_code": "ptv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1979", + "scripts": [], + "own_tokenizer": false }, { "name": "Sa", "iso_1_code": null, "iso_3_code": "sax", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1980", + "scripts": [], + "own_tokenizer": false }, { "name": "Ske", "iso_1_code": null, "iso_3_code": "ske", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1981", + "scripts": [], + "own_tokenizer": false }, { "name": "Sowa", "iso_1_code": null, "iso_3_code": "sww", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1982", + "scripts": [], + "own_tokenizer": false }, { "name": "Nume", "iso_1_code": null, "iso_3_code": "tgs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1983", + "scripts": [], + "own_tokenizer": false }, { "name": "Lehali", "iso_1_code": null, "iso_3_code": "tql", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1984", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambrym, Southeast", "iso_1_code": null, "iso_3_code": "tvk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1985", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "L\u00f6y\u00f6p", "iso_1_code": null, "iso_3_code": "urr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1986", + "scripts": [], + "own_tokenizer": false }, { "name": "Vera\u2019a", "iso_1_code": null, "iso_3_code": "vra", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1987", + "scripts": [], + "own_tokenizer": false }, { "name": "Dorig", "iso_1_code": null, "iso_3_code": "wwo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1988", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1956", + "scripts": [], + "own_tokenizer": false }, { "name": "Epi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bieria-Maii", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bieria", "iso_1_code": null, "iso_3_code": "brj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1991", + "scripts": [], + "own_tokenizer": false }, { "name": "Maii", "iso_1_code": null, "iso_3_code": "mmm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1992", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1990", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamenu-Baki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Baki-Bierebo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Baki", "iso_1_code": null, "iso_3_code": "bki", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1995", + "scripts": [], + "own_tokenizer": false }, { "name": "Bierebo", "iso_1_code": null, "iso_3_code": "bnk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1996", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1994", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamenu-Lewo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lamenu", "iso_1_code": null, "iso_3_code": "lmu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "1998", + "scripts": [], + "own_tokenizer": false }, { "name": "Lewo", "iso_1_code": null, "iso_3_code": "lww", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "1999", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "1997", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1993", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1989", + "scripts": [], + "own_tokenizer": false }, { "name": "Malekula Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Axamb", "iso_1_code": null, "iso_3_code": "ahb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2001", + "scripts": [], + "own_tokenizer": false }, { "name": "Aulua", "iso_1_code": null, "iso_3_code": "aul", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2002", + "scripts": [], + "own_tokenizer": false }, { "name": "Maskelynes", "iso_1_code": null, "iso_3_code": "klv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2003", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malua Bay", "iso_1_code": null, "iso_3_code": "mll", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2004", + "scripts": [], + "own_tokenizer": false }, { "name": "Na\u2019ahai", "iso_1_code": null, "iso_3_code": "mlx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2005", + "scripts": [], + "own_tokenizer": false }, { "name": "Mae", "iso_1_code": null, "iso_3_code": "mme", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2006", + "scripts": [], + "own_tokenizer": false }, { "name": "Mpotovoro", "iso_1_code": null, "iso_3_code": "mvt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2007", + "scripts": [], + "own_tokenizer": false }, { "name": "Unua", "iso_1_code": null, "iso_3_code": "onu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2008", + "scripts": [], + "own_tokenizer": false }, { "name": "Rerep", "iso_1_code": null, "iso_3_code": "pgk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2009", + "scripts": [], + "own_tokenizer": false }, { "name": "Port Sandwich", "iso_1_code": null, "iso_3_code": "psw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2010", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahavaq", "iso_1_code": null, "iso_3_code": "sns", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2011", + "scripts": [], + "own_tokenizer": false }, { "name": "Uripiv-Wala-Rano-Atchin", "iso_1_code": null, "iso_3_code": "upv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2012", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vao", "iso_1_code": null, "iso_3_code": "vao", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2013", + "scripts": [], + "own_tokenizer": false }, { "name": "Banam Bay", "iso_1_code": null, "iso_3_code": "vrt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2014", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2000", + "scripts": [], + "own_tokenizer": false }, { "name": "West Santo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Soro-n Raki", "iso_1_code": null, "iso_3_code": "akr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2016", + "scripts": [], + "own_tokenizer": false }, { "name": "Amblong", "iso_1_code": null, "iso_3_code": "alm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2017", + "scripts": [], + "own_tokenizer": false }, { "name": "Aore", "iso_1_code": null, "iso_3_code": "aor", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2018", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiae", "iso_1_code": null, "iso_3_code": "frt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2019", + "scripts": [], + "own_tokenizer": false }, { "name": "Merei", "iso_1_code": null, "iso_3_code": "lmb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2020", + "scripts": [], + "own_tokenizer": false }, { "name": "Mafea", "iso_1_code": null, "iso_3_code": "mkv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2021", + "scripts": [], + "own_tokenizer": false }, { "name": "Malo", "iso_1_code": null, "iso_3_code": "mla", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2022", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiale", "iso_1_code": null, "iso_3_code": "mnl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2023", + "scripts": [], + "own_tokenizer": false }, { "name": "Morouas", "iso_1_code": null, "iso_3_code": "mrp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2024", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanokuku", "iso_1_code": null, "iso_3_code": "nkk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2025", + "scripts": [], + "own_tokenizer": false }, { "name": "Varsaf", "iso_1_code": null, "iso_3_code": "nrg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2026", + "scripts": [], + "own_tokenizer": false }, { "name": "Balen", "iso_1_code": null, "iso_3_code": "nsw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2027", + "scripts": [], + "own_tokenizer": false }, { "name": "Tapiafaru", "iso_1_code": null, "iso_3_code": "ptr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2028", + "scripts": [], + "own_tokenizer": false }, { "name": "Mores", "iso_1_code": null, "iso_3_code": "rga", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2029", + "scripts": [], + "own_tokenizer": false }, { "name": "Tangoa", "iso_1_code": null, "iso_3_code": "tgp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2030", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tolomako", "iso_1_code": null, "iso_3_code": "tlm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2031", + "scripts": [], + "own_tokenizer": false }, { "name": "Tambotalo", "iso_1_code": null, "iso_3_code": "tls", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2032", + "scripts": [], + "own_tokenizer": false }, { "name": "Vinekula", "iso_1_code": null, "iso_3_code": "tmi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2033", + "scripts": [], + "own_tokenizer": false }, { "name": "Oa", "iso_1_code": null, "iso_3_code": "tmt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2034", + "scripts": [], + "own_tokenizer": false }, { "name": "Akei", "iso_1_code": null, "iso_3_code": "tsr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2035", + "scripts": [], + "own_tokenizer": false }, { "name": "Tavanlav", "iso_1_code": null, "iso_3_code": "vlp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2036", + "scripts": [], + "own_tokenizer": false }, { "name": "Tapesena", "iso_1_code": null, "iso_3_code": "vnp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2037", + "scripts": [], + "own_tokenizer": false }, { "name": "Moiso", "iso_1_code": null, "iso_3_code": "wlr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2038", + "scripts": [], + "own_tokenizer": false }, { "name": "Jo", "iso_1_code": null, "iso_3_code": "wsi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2039", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2015", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1949", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1924", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1780", + "scripts": [], + "own_tokenizer": false }, { "name": "South Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aneityum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aneityum", "iso_1_code": null, "iso_3_code": "aty", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2042", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2041", + "scripts": [], + "own_tokenizer": false }, { "name": "Erromanga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sie", "iso_1_code": null, "iso_3_code": "erg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2044", + "scripts": [], + "own_tokenizer": false }, { "name": "Ifo", "iso_1_code": null, "iso_3_code": "iff", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2045", + "scripts": [], + "own_tokenizer": false }, { "name": "Ura", "iso_1_code": null, "iso_3_code": "uur", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2046", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2043", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tanna, Southwest", "iso_1_code": null, "iso_3_code": "nwi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2048", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kwamera", "iso_1_code": null, "iso_3_code": "tnk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2049", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lenakel", "iso_1_code": null, "iso_3_code": "tnl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2050", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanna, North", "iso_1_code": null, "iso_3_code": "tnn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2051", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Whitesands", "iso_1_code": null, "iso_3_code": "tnp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2052", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2047", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2040", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeast Solomonic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gela-Guadalcanal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bughotu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bughotu", "iso_1_code": null, "iso_3_code": "bgt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2056", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2055", + "scripts": [], + "own_tokenizer": false }, { "name": "Gela", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lengo", "iso_1_code": null, "iso_3_code": "lgr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2058", + "scripts": [], + "own_tokenizer": false }, { "name": "Gela", "iso_1_code": null, "iso_3_code": "nlg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2059", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2057", + "scripts": [], + "own_tokenizer": false }, { "name": "Guadalcanal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Birao", "iso_1_code": null, "iso_3_code": "brr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2061", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghari", "iso_1_code": null, "iso_3_code": "gri", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2062", + "scripts": [], + "own_tokenizer": false }, { "name": "Malango", "iso_1_code": null, "iso_3_code": "mln", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2063", + "scripts": [], + "own_tokenizer": false }, { "name": "Talise", "iso_1_code": null, "iso_3_code": "tlr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2064", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2060", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2054", + "scripts": [], + "own_tokenizer": false }, { "name": "Malaita-San Cristobal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Malaita", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Longgu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Longgu", "iso_1_code": null, "iso_3_code": "lgu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2068", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2067", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Baelelea", "iso_1_code": null, "iso_3_code": "bvc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2070", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Baeggu", "iso_1_code": null, "iso_3_code": "bvd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2071", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fataleka", "iso_1_code": null, "iso_3_code": "far", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2072", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gula\u2019alaa", "iso_1_code": null, "iso_3_code": "gmb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2073", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwaio", "iso_1_code": null, "iso_3_code": "kwd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2074", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kwara\u2019ae", "iso_1_code": null, "iso_3_code": "kwf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2075", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wala", "iso_1_code": null, "iso_3_code": "lgl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2076", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lau", "iso_1_code": null, "iso_3_code": "llu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2077", + "scripts": [], + "own_tokenizer": false }, { "name": "To\u2019abaita", "iso_1_code": null, "iso_3_code": "mlu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2078", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2069", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "\u2019Are\u2019are", "iso_1_code": null, "iso_3_code": "alu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2080", + "scripts": [], + "own_tokenizer": false }, { "name": "Sa\ua78ca", "iso_1_code": null, "iso_3_code": "apb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2081", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dori\u2019o", "iso_1_code": null, "iso_3_code": "dor", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2082", + "scripts": [], + "own_tokenizer": false }, { "name": "Oroha", "iso_1_code": null, "iso_3_code": "ora", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2083", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2079", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2066", + "scripts": [], + "own_tokenizer": false }, { "name": "San Cristobal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kahua", "iso_1_code": null, "iso_3_code": "agw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2085", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Arosi", "iso_1_code": null, "iso_3_code": "aia", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2086", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bauro", "iso_1_code": null, "iso_3_code": "bxa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2087", + "scripts": [], + "own_tokenizer": false }, { "name": "Fagani", "iso_1_code": null, "iso_3_code": "faf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2088", + "scripts": [], + "own_tokenizer": false }, { "name": "Owa", "iso_1_code": null, "iso_3_code": "stn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2089", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2084", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2065", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2053", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1779", + "scripts": [], + "own_tokenizer": false }, { "name": "St. Matthias", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mussau-Emira", "iso_1_code": null, "iso_3_code": "emi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2091", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tenis", "iso_1_code": null, "iso_3_code": "tns", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2092", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2090", + "scripts": [], + "own_tokenizer": false }, { "name": "Temotu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Reefs-Santa Cruz", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "\u00c4iwoo", "iso_1_code": null, "iso_3_code": "nfl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2095", + "scripts": [], + "own_tokenizer": false }, { "name": "Engdewu", "iso_1_code": null, "iso_3_code": "ngr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2096", + "scripts": [], + "own_tokenizer": false }, { "name": "Nal\u00f6go", "iso_1_code": null, "iso_3_code": "nlz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2097", + "scripts": [], + "own_tokenizer": false }, { "name": "Noip\u00e4", "iso_1_code": null, "iso_3_code": "npx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2098", + "scripts": [], + "own_tokenizer": false }, { "name": "Nat\u00fcgu", "iso_1_code": null, "iso_3_code": "ntu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2099", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2094", + "scripts": [], + "own_tokenizer": false }, { "name": "Utupua-Vanikoro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Teanu", "iso_1_code": null, "iso_3_code": "tkw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2101", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanema", "iso_1_code": null, "iso_3_code": "tnx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2102", + "scripts": [], + "own_tokenizer": false }, { "name": "Lovono", "iso_1_code": null, "iso_3_code": "vnk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2103", + "scripts": [], + "own_tokenizer": false }, { "name": "Utupua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Asumboa", "iso_1_code": null, "iso_3_code": "aua", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2105", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanibili", "iso_1_code": null, "iso_3_code": "tbe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2106", + "scripts": [], + "own_tokenizer": false }, { "name": "Amba", "iso_1_code": null, "iso_3_code": "utp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2107", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2104", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2100", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2093", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Meso Melanesian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bali-Vitu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Uneapa", "iso_1_code": null, "iso_3_code": "bbn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2111", + "scripts": [], + "own_tokenizer": false }, { "name": "Vitu", "iso_1_code": null, "iso_3_code": "wiv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2112", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2110", + "scripts": [], + "own_tokenizer": false }, { "name": "New Ireland", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lavongai-Nalik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tungag", "iso_1_code": null, "iso_3_code": "lcm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2115", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kara", "iso_1_code": null, "iso_3_code": "leu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2116", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lakurumau", "iso_1_code": null, "iso_3_code": "lxm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2117", + "scripts": [], + "own_tokenizer": false }, { "name": "Nalik", "iso_1_code": null, "iso_3_code": "nal", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2118", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandara", "iso_1_code": null, "iso_3_code": "tbf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2119", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiang", "iso_1_code": null, "iso_3_code": "tbj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2120", + "scripts": [], + "own_tokenizer": false }, { "name": "Tigak", "iso_1_code": null, "iso_3_code": "tgc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2121", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2114", + "scripts": [], + "own_tokenizer": false }, { "name": "Madak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Barok", "iso_1_code": null, "iso_3_code": "bjk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2123", + "scripts": [], + "own_tokenizer": false }, { "name": "Lavatbura-Lamusong", "iso_1_code": null, "iso_3_code": "lbv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2124", + "scripts": [], + "own_tokenizer": false }, { "name": "Madak", "iso_1_code": null, "iso_3_code": "mmx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2125", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2122", + "scripts": [], + "own_tokenizer": false }, { "name": "South New Ireland-Northwest Solomonic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Minigir", "iso_1_code": null, "iso_3_code": "bxf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2127", + "scripts": [], + "own_tokenizer": false }, { "name": "Choiseul", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Babatana", "iso_1_code": null, "iso_3_code": "baa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2129", + "scripts": [], + "own_tokenizer": false }, { "name": "Ririo", "iso_1_code": null, "iso_3_code": "rri", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2130", + "scripts": [], + "own_tokenizer": false }, { "name": "Vaghua", "iso_1_code": null, "iso_3_code": "tva", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2131", + "scripts": [], + "own_tokenizer": false }, { "name": "Varisi", "iso_1_code": null, "iso_3_code": "vrs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2132", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2128", + "scripts": [], + "own_tokenizer": false }, { "name": "Mono-Uruava", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mono", "iso_1_code": null, "iso_3_code": "mte", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2134", + "scripts": [], + "own_tokenizer": false }, { "name": "Torau", "iso_1_code": null, "iso_3_code": "ttu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2135", + "scripts": [], + "own_tokenizer": false }, { "name": "Uruava", "iso_1_code": null, "iso_3_code": "urv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2136", + "scripts": [], + "own_tokenizer": false }, { "name": "Vinitiri", "iso_1_code": null, "iso_3_code": "vmg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2137", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2133", + "scripts": [], + "own_tokenizer": false }, { "name": "Nehan-North Bougainville", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Petats", "iso_1_code": null, "iso_3_code": "pex", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2140", + "scripts": [], + "own_tokenizer": false }, { "name": "Halia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hak\u00f6", "iso_1_code": null, "iso_3_code": "hao", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2142", + "scripts": [], + "own_tokenizer": false }, { "name": "Halia", "iso_1_code": null, "iso_3_code": "hla", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2143", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2141", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2139", + "scripts": [], + "own_tokenizer": false }, { "name": "Nehan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nehan", "iso_1_code": null, "iso_3_code": "nsn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2145", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2144", + "scripts": [], + "own_tokenizer": false }, { "name": "Papapana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Papapana", "iso_1_code": null, "iso_3_code": "ppn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2147", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2146", + "scripts": [], + "own_tokenizer": false }, { "name": "Saposa-Tinputz", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hahon", "iso_1_code": null, "iso_3_code": "hah", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2149", + "scripts": [], + "own_tokenizer": false }, { "name": "Saposa", "iso_1_code": null, "iso_3_code": "sps", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2150", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Teop", "iso_1_code": null, "iso_3_code": "tio", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2151", + "scripts": [], + "own_tokenizer": false }, { "name": "Tinputz", "iso_1_code": null, "iso_3_code": "tpz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2152", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2148", + "scripts": [], + "own_tokenizer": false }, { "name": "Solos", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Solos", "iso_1_code": null, "iso_3_code": "sol", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2154", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2153", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2138", + "scripts": [], + "own_tokenizer": false }, { "name": "New Georgia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Vangunu", "iso_1_code": null, "iso_3_code": "mpr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2157", + "scripts": [], + "own_tokenizer": false }, { "name": "Marovo", "iso_1_code": null, "iso_3_code": "mvo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2158", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2156", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ghanongga", "iso_1_code": null, "iso_3_code": "ghn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2160", + "scripts": [], + "own_tokenizer": false }, { "name": "Hoava", "iso_1_code": null, "iso_3_code": "hoa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2161", + "scripts": [], + "own_tokenizer": false }, { "name": "Kusaghe", "iso_1_code": null, "iso_3_code": "ksg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2162", + "scripts": [], + "own_tokenizer": false }, { "name": "Kazukuru", "iso_1_code": null, "iso_3_code": "kzk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2163", + "scripts": [], + "own_tokenizer": false }, { "name": "Lungga", "iso_1_code": null, "iso_3_code": "lga", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2164", + "scripts": [], + "own_tokenizer": false }, { "name": "Dughore", "iso_1_code": null, "iso_3_code": "nke", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2165", + "scripts": [], + "own_tokenizer": false }, { "name": "Roviana", "iso_1_code": null, "iso_3_code": "rug", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2166", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Simbo", "iso_1_code": null, "iso_3_code": "sbb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2167", + "scripts": [], + "own_tokenizer": false }, { "name": "Ughele", "iso_1_code": null, "iso_3_code": "uge", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2168", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2159", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2155", + "scripts": [], + "own_tokenizer": false }, { "name": "Patpatar-Tolai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Fanamaket", "iso_1_code": null, "iso_3_code": "bjp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2170", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Patpatar", "iso_1_code": null, "iso_3_code": "gfk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2171", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guramalum", "iso_1_code": null, "iso_3_code": "grz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2172", + "scripts": [], + "own_tokenizer": false }, { "name": "Niwer Mil", "iso_1_code": null, "iso_3_code": "hrc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2173", + "scripts": [], + "own_tokenizer": false }, { "name": "Warwar Feni", "iso_1_code": null, "iso_3_code": "hrw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2174", + "scripts": [], + "own_tokenizer": false }, { "name": "Konomala", "iso_1_code": null, "iso_3_code": "koa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2175", + "scripts": [], + "own_tokenizer": false }, { "name": "Kandas", "iso_1_code": null, "iso_3_code": "kqw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2176", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kuanua", "iso_1_code": null, "iso_3_code": "ksd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2177", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Label", "iso_1_code": null, "iso_3_code": "lbb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2178", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ramoaaina", "iso_1_code": null, "iso_3_code": "rai", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2179", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sursurunga", "iso_1_code": null, "iso_3_code": "sgz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2180", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Siar-Lak", "iso_1_code": null, "iso_3_code": "sjr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2181", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2169", + "scripts": [], + "own_tokenizer": false }, { "name": "Piva-Banoni", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bannoni", "iso_1_code": null, "iso_3_code": "bcm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2183", + "scripts": [], + "own_tokenizer": false }, { "name": "Lawunuia", "iso_1_code": null, "iso_3_code": "tgi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2184", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2182", + "scripts": [], + "own_tokenizer": false }, { "name": "Santa Isabel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Blablanga", "iso_1_code": null, "iso_3_code": "blp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2187", + "scripts": [], + "own_tokenizer": false }, { "name": "Zazao", "iso_1_code": null, "iso_3_code": "jaj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2188", + "scripts": [], + "own_tokenizer": false }, { "name": "Kokota", "iso_1_code": null, "iso_3_code": "kkk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2189", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2186", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gao", "iso_1_code": null, "iso_3_code": "gga", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2191", + "scripts": [], + "own_tokenizer": false }, { "name": "Cheke Holo", "iso_1_code": null, "iso_3_code": "mrn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2192", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2190", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Zabana", "iso_1_code": null, "iso_3_code": "kji", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2194", + "scripts": [], + "own_tokenizer": false }, { "name": "Laghu", "iso_1_code": null, "iso_3_code": "lgb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2195", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2193", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2185", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2126", + "scripts": [], + "own_tokenizer": false }, { "name": "Tabar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lihir", "iso_1_code": null, "iso_3_code": "lih", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2197", + "scripts": [], + "own_tokenizer": false }, { "name": "Notsi", "iso_1_code": null, "iso_3_code": "ncf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2198", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2196", + "scripts": [], + "own_tokenizer": false }, { "name": "Tomoip", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tomoip", "iso_1_code": null, "iso_3_code": "tqp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2200", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2199", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2113", + "scripts": [], + "own_tokenizer": false }, { "name": "Willaumez", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bulu", "iso_1_code": null, "iso_3_code": "bjl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2202", + "scripts": [], + "own_tokenizer": false }, { "name": "Bola", "iso_1_code": null, "iso_3_code": "bnp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2203", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Meramera", "iso_1_code": null, "iso_3_code": "mxm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2204", + "scripts": [], + "own_tokenizer": false }, { "name": "Nakanai", "iso_1_code": null, "iso_3_code": "nak", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2205", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2109", + "scripts": [], + "own_tokenizer": false }, { "name": "North New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Huon Gulf", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Markham", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lower", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Busu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Duwet", "iso_1_code": null, "iso_3_code": "gve", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2211", + "scripts": [], + "own_tokenizer": false }, { "name": "Aribwatsa", "iso_1_code": null, "iso_3_code": "laz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2212", + "scripts": [], + "own_tokenizer": false }, { "name": "Musom", "iso_1_code": null, "iso_3_code": "msu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2213", + "scripts": [], + "own_tokenizer": false }, { "name": "Nafi", "iso_1_code": null, "iso_3_code": "srf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2214", + "scripts": [], + "own_tokenizer": false }, { "name": "Aribwaung", "iso_1_code": null, "iso_3_code": "ylu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2215", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2210", + "scripts": [], + "own_tokenizer": false }, { "name": "Labu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Labu", "iso_1_code": null, "iso_3_code": "lbu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2217", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2216", + "scripts": [], + "own_tokenizer": false }, { "name": "Wampar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Wampar", "iso_1_code": null, "iso_3_code": "lbq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2219", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2218", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2209", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Adzera", "iso_1_code": null, "iso_3_code": "adz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2221", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mountain", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mari", "iso_1_code": null, "iso_3_code": "hob", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2223", + "scripts": [], + "own_tokenizer": false }, { "name": "Wampur", "iso_1_code": null, "iso_3_code": "waz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2224", + "scripts": [], + "own_tokenizer": false }, { "name": "Sarasira", "iso_1_code": null, "iso_3_code": "zsa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2225", + "scripts": [], + "own_tokenizer": false }, { "name": "Sukurum", "iso_1_code": null, "iso_3_code": "zsu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2226", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2222", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2220", + "scripts": [], + "own_tokenizer": false }, { "name": "Watut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kodut, South", "iso_1_code": null, "iso_3_code": "mcy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2228", + "scripts": [], + "own_tokenizer": false }, { "name": "Kodut, Middle", "iso_1_code": null, "iso_3_code": "mpl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2229", + "scripts": [], + "own_tokenizer": false }, { "name": "Kodut, North", "iso_1_code": null, "iso_3_code": "una", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2230", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2227", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2208", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bugawac", "iso_1_code": null, "iso_3_code": "buk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2232", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yabem", "iso_1_code": null, "iso_3_code": "jae", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2233", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kala", "iso_1_code": null, "iso_3_code": "kcl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2234", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2231", + "scripts": [], + "own_tokenizer": false }, { "name": "Numbami", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Numbami", "iso_1_code": null, "iso_3_code": "sij", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2236", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2235", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } + }, "children": [ { "name": "Hote-Buang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buang, Central", "iso_1_code": null, "iso_3_code": "bzh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2240", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Buang, Mangga", "iso_1_code": null, "iso_3_code": "mmo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2241", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Piu", "iso_1_code": null, "iso_3_code": "pix", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2242", + "scripts": [], + "own_tokenizer": false }, { "name": "Kapin", "iso_1_code": null, "iso_3_code": "tbx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2243", + "scripts": [], + "own_tokenizer": false }, { "name": "Vehes", "iso_1_code": null, "iso_3_code": "val", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2244", + "scripts": [], + "own_tokenizer": false }, { "name": "Mumeng", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dambi", "iso_1_code": null, "iso_3_code": "dac", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2246", + "scripts": [], + "own_tokenizer": false }, { "name": "Gorakor", "iso_1_code": null, "iso_3_code": "goc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2247", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumalu", "iso_1_code": null, "iso_3_code": "ksl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2248", + "scripts": [], + "own_tokenizer": false }, { "name": "Patep", "iso_1_code": null, "iso_3_code": "ptp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2249", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zenag", "iso_1_code": null, "iso_3_code": "zeg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2250", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2245", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2239", + "scripts": [], + "own_tokenizer": false }, { "name": "Hote", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Malei", "iso_1_code": null, "iso_3_code": "hot", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2252", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yamap", "iso_1_code": null, "iso_3_code": "ymp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2253", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2251", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2238", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaiwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Iwal", "iso_1_code": null, "iso_3_code": "kbm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2255", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2254", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2237", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2207", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngero-Vitiaz", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ngero", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bariai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bariai", "iso_1_code": null, "iso_3_code": "bch", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2259", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lusi", "iso_1_code": null, "iso_3_code": "khl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2260", + "scripts": [], + "own_tokenizer": false }, { "name": "Kove", "iso_1_code": null, "iso_3_code": "kvc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2261", + "scripts": [], + "own_tokenizer": false }, { "name": "Mala", "iso_1_code": null, "iso_3_code": "mmt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2262", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2258", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gitua", "iso_1_code": null, "iso_3_code": "ggt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2264", + "scripts": [], + "own_tokenizer": false }, { "name": "Mutu", "iso_1_code": null, "iso_3_code": "tuc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2265", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2263", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2257", + "scripts": [], + "own_tokenizer": false }, { "name": "Vitiaz", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Astrolabe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Awad Bing", "iso_1_code": null, "iso_3_code": "bcu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2269", + "scripts": [], + "own_tokenizer": false }, { "name": "Mindiri", "iso_1_code": null, "iso_3_code": "mpn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2270", + "scripts": [], + "own_tokenizer": false }, { "name": "Yote", "iso_1_code": null, "iso_3_code": "wab", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2271", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2268", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear Bel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bilbil", "iso_1_code": null, "iso_3_code": "brz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2274", + "scripts": [], + "own_tokenizer": false }, { "name": "Gedaged", "iso_1_code": null, "iso_3_code": "gdd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2275", + "scripts": [], + "own_tokenizer": false }, { "name": "Matukar Panau", "iso_1_code": null, "iso_3_code": "mjk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2276", + "scripts": [], + "own_tokenizer": false }, { "name": "Takia", "iso_1_code": null, "iso_3_code": "tbc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2277", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2273", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Marik", "iso_1_code": null, "iso_3_code": "dad", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2279", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2278", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2272", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2267", + "scripts": [], + "own_tokenizer": false }, { "name": "Kilenge-Maleu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Maleu-Kilenge", "iso_1_code": null, "iso_3_code": "mgl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2281", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2280", + "scripts": [], + "own_tokenizer": false }, { "name": "Korap", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Arop-Lokep", "iso_1_code": null, "iso_3_code": "apr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2283", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Karnai", "iso_1_code": null, "iso_3_code": "bbv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2284", + "scripts": [], + "own_tokenizer": false }, { "name": "Pano", "iso_1_code": null, "iso_3_code": "mqz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2285", + "scripts": [], + "own_tokenizer": false }, { "name": "Mur Pano", "iso_1_code": null, "iso_3_code": "tkv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2286", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2282", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangap-Mbula", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mbula", "iso_1_code": null, "iso_3_code": "mna", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2288", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2287", + "scripts": [], + "own_tokenizer": false }, { "name": "Mengen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mamusi", "iso_1_code": null, "iso_3_code": "kdf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2290", + "scripts": [], + "own_tokenizer": false }, { "name": "Mengen", "iso_1_code": null, "iso_3_code": "mee", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2291", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lote", "iso_1_code": null, "iso_3_code": "uvl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2292", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2289", + "scripts": [], + "own_tokenizer": false }, { "name": "Roinji-Nenaya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mato", "iso_1_code": null, "iso_3_code": "met", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2294", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Uma", "iso_1_code": null, "iso_3_code": "roe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2293", + "scripts": [], + "own_tokenizer": false }, { "name": "Sio", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sio", "iso_1_code": null, "iso_3_code": "xsi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2297", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2296", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Amara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Amara", "iso_1_code": null, "iso_3_code": "aie", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2300", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2299", + "scripts": [], + "own_tokenizer": false }, { "name": "Arawe-Pasismanua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mangseng", "iso_1_code": null, "iso_3_code": "mbh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2303", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "East Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Akolet", "iso_1_code": null, "iso_3_code": "akt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2305", + "scripts": [], + "own_tokenizer": false }, { "name": "Avau", "iso_1_code": null, "iso_3_code": "avb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2306", + "scripts": [], + "own_tokenizer": false }, { "name": "Bebeli", "iso_1_code": null, "iso_3_code": "bek", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2307", + "scripts": [], + "own_tokenizer": false }, { "name": "Amio-Gelimi", "iso_1_code": null, "iso_3_code": "let", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2308", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2304", + "scripts": [], + "own_tokenizer": false }, { "name": "West Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Solong", "iso_1_code": null, "iso_3_code": "aaw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2310", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambul", "iso_1_code": null, "iso_3_code": "apo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2311", + "scripts": [], + "own_tokenizer": false }, { "name": "Gimi", "iso_1_code": null, "iso_3_code": "gip", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2312", + "scripts": [], + "own_tokenizer": false }, { "name": "Aiklep", "iso_1_code": null, "iso_3_code": "mwg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2313", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2309", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2302", + "scripts": [], + "own_tokenizer": false }, { "name": "Pasismanua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aighon", "iso_1_code": null, "iso_3_code": "aix", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2315", + "scripts": [], + "own_tokenizer": false }, { "name": "Miu", "iso_1_code": null, "iso_3_code": "mpo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2316", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaulong", "iso_1_code": null, "iso_3_code": "pss", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2317", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sengseng", "iso_1_code": null, "iso_3_code": "ssz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2318", + "scripts": [], + "own_tokenizer": false }, { "name": "Karore", "iso_1_code": null, "iso_3_code": "xkx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2319", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2314", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2301", + "scripts": [], + "own_tokenizer": false }, { "name": "Bibling", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lamogai", "iso_1_code": null, "iso_3_code": "lmg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2321", + "scripts": [], + "own_tokenizer": false }, { "name": "Mouk-Aria", "iso_1_code": null, "iso_3_code": "mwh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2322", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2320", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2298", + "scripts": [], + "own_tokenizer": false }, { "name": "Tami", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tami", "iso_1_code": null, "iso_3_code": "tmy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2324", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2323", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2266", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2256", + "scripts": [], + "own_tokenizer": false }, { "name": "Sarmi-Jayapura Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Jayapura Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kayupulau", "iso_1_code": null, "iso_3_code": "kzu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2327", + "scripts": [], + "own_tokenizer": false }, { "name": "Ormu", "iso_1_code": null, "iso_3_code": "orz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2328", + "scripts": [], + "own_tokenizer": false }, { "name": "Tobati", "iso_1_code": null, "iso_3_code": "tti", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2329", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2326", + "scripts": [], + "own_tokenizer": false }, { "name": "Sarmi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Anus", "iso_1_code": null, "iso_3_code": "auq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2331", + "scripts": [], + "own_tokenizer": false }, { "name": "Bonggo", "iso_1_code": null, "iso_3_code": "bpg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2332", + "scripts": [], + "own_tokenizer": false }, { "name": "Masimasi", "iso_1_code": null, "iso_3_code": "ism", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2333", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaptiau", "iso_1_code": null, "iso_3_code": "kbi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2334", + "scripts": [], + "own_tokenizer": false }, { "name": "Liki", "iso_1_code": null, "iso_3_code": "lio", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2335", + "scripts": [], + "own_tokenizer": false }, { "name": "Fedan", "iso_1_code": null, "iso_3_code": "pdn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2336", + "scripts": [], + "own_tokenizer": false }, { "name": "Sobei", "iso_1_code": null, "iso_3_code": "sob", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2337", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarpia", "iso_1_code": null, "iso_3_code": "tpf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2338", + "scripts": [], + "own_tokenizer": false }, { "name": "Mo", "iso_1_code": null, "iso_3_code": "wkd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2339", + "scripts": [], + "own_tokenizer": false }, { "name": "Sunum", "iso_1_code": null, "iso_3_code": "ymn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2340", + "scripts": [], + "own_tokenizer": false }, { "name": "Yarsun", "iso_1_code": null, "iso_3_code": "yrs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2341", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2330", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2325", + "scripts": [], + "own_tokenizer": false }, { "name": "Schouten", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kairiru-Manam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kairiru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kaiep", "iso_1_code": null, "iso_3_code": "kbw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2345", + "scripts": [], + "own_tokenizer": false }, { "name": "Kairiru", "iso_1_code": null, "iso_3_code": "kxa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2346", + "scripts": [], + "own_tokenizer": false }, { "name": "Terebu", "iso_1_code": null, "iso_3_code": "trb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2347", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2344", + "scripts": [], + "own_tokenizer": false }, { "name": "Manam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Biem", "iso_1_code": null, "iso_3_code": "bmc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2349", + "scripts": [], + "own_tokenizer": false }, { "name": "Kis", "iso_1_code": null, "iso_3_code": "kis", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2350", + "scripts": [], + "own_tokenizer": false }, { "name": "Medebur", "iso_1_code": null, "iso_3_code": "mjm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2351", + "scripts": [], + "own_tokenizer": false }, { "name": "Manam", "iso_1_code": null, "iso_3_code": "mva", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2352", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sepa", "iso_1_code": null, "iso_3_code": "spe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2353", + "scripts": [], + "own_tokenizer": false }, { "name": "Wogeo", "iso_1_code": null, "iso_3_code": "woc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2354", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2348", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2343", + "scripts": [], + "own_tokenizer": false }, { "name": "Siau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Orop", "iso_1_code": null, "iso_3_code": "aps", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2356", + "scripts": [], + "own_tokenizer": false }, { "name": "Malol", "iso_1_code": null, "iso_3_code": "mbk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2357", + "scripts": [], + "own_tokenizer": false }, { "name": "Sera", "iso_1_code": null, "iso_3_code": "sry", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2358", + "scripts": [], + "own_tokenizer": false }, { "name": "Essono", "iso_1_code": null, "iso_3_code": "sso", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2359", + "scripts": [], + "own_tokenizer": false }, { "name": "Ulau-Suain", "iso_1_code": null, "iso_3_code": "svb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2360", + "scripts": [], + "own_tokenizer": false }, { "name": "Tumleo", "iso_1_code": null, "iso_3_code": "tmq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2361", + "scripts": [], + "own_tokenizer": false }, { "name": "Kap", "iso_1_code": null, "iso_3_code": "ykm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2362", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2355", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2342", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2206", + "scripts": [], + "own_tokenizer": false }, { "name": "Papuan Tip", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Maisin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Maisin", "iso_1_code": null, "iso_3_code": "mbq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2366", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2365", + "scripts": [], + "own_tokenizer": false }, { "name": "North Papuan Mainland-D\u2019Entrecasteaux", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Anuki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Anuki", "iso_1_code": null, "iso_3_code": "aui", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2369", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2368", + "scripts": [], + "own_tokenizer": false }, { "name": "Are-Taupota", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Are", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Miniafia Oyan", "iso_1_code": null, "iso_3_code": "aai", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2372", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ghayavi", "iso_1_code": null, "iso_3_code": "bmk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2373", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Doga", "iso_1_code": null, "iso_3_code": "dgg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2374", + "scripts": [], + "own_tokenizer": false }, { "name": "Are", "iso_1_code": null, "iso_3_code": "mwc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2375", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gapapaiwa", "iso_1_code": null, "iso_3_code": "pwg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2376", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ubir", "iso_1_code": null, "iso_3_code": "ubr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2377", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kaninuwa", "iso_1_code": null, "iso_3_code": "wat", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2378", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2371", + "scripts": [], + "own_tokenizer": false }, { "name": "Taupota", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gweda", "iso_1_code": null, "iso_3_code": "grw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2380", + "scripts": [], + "own_tokenizer": false }, { "name": "Haigwai", "iso_1_code": null, "iso_3_code": "hgw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2381", + "scripts": [], + "own_tokenizer": false }, { "name": "Maiwala", "iso_1_code": null, "iso_3_code": "mum", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2382", + "scripts": [], + "own_tokenizer": false }, { "name": "Minaveha", "iso_1_code": null, "iso_3_code": "mvn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2383", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tawala", "iso_1_code": null, "iso_3_code": "tbo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2384", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Taupota", "iso_1_code": null, "iso_3_code": "tpa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2385", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wa\u2019ema", "iso_1_code": null, "iso_3_code": "wag", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2386", + "scripts": [], + "own_tokenizer": false }, { "name": "Wedau", "iso_1_code": null, "iso_3_code": "wed", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2387", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yakaikeke", "iso_1_code": null, "iso_3_code": "ykk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2388", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2379", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2370", + "scripts": [], + "own_tokenizer": false }, { "name": "Bwaidoga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bwaidoka", "iso_1_code": null, "iso_3_code": "bwd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2390", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Goodenough, West", "iso_1_code": null, "iso_3_code": "ddi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2391", + "scripts": [], + "own_tokenizer": false }, { "name": "Koluwawa", "iso_1_code": null, "iso_3_code": "klx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2392", + "scripts": [], + "own_tokenizer": false }, { "name": "Molima", "iso_1_code": null, "iso_3_code": "mox", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2393", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maiadomu", "iso_1_code": null, "iso_3_code": "mzz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2394", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Iduna", "iso_1_code": null, "iso_3_code": "viv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2395", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Iamalele", "iso_1_code": null, "iso_3_code": "yml", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2396", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2389", + "scripts": [], + "own_tokenizer": false }, { "name": "Dobu-Duau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bunama", "iso_1_code": null, "iso_3_code": "bdd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2398", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Boselewa", "iso_1_code": null, "iso_3_code": "bwf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2399", + "scripts": [], + "own_tokenizer": false }, { "name": "Dobu", "iso_1_code": null, "iso_3_code": "dob", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2400", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Duau", "iso_1_code": null, "iso_3_code": "dva", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2401", + "scripts": [], + "own_tokenizer": false }, { "name": "Galeya", "iso_1_code": null, "iso_3_code": "gar", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2402", + "scripts": [], + "own_tokenizer": false }, { "name": "Mwatebu", "iso_1_code": null, "iso_3_code": "mwa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2403", + "scripts": [], + "own_tokenizer": false }, { "name": "Sewa Bay", "iso_1_code": null, "iso_3_code": "sew", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2404", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2397", + "scripts": [], + "own_tokenizer": false }, { "name": "Gumawana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gumawana", "iso_1_code": null, "iso_3_code": "gvs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2406", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2405", + "scripts": [], + "own_tokenizer": false }, { "name": "Kakabai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dawawa", "iso_1_code": null, "iso_3_code": "dww", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2408", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kakabai", "iso_1_code": null, "iso_3_code": "kqf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2409", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2407", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2367", + "scripts": [], + "own_tokenizer": false }, { "name": "Suauic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buhutu", "iso_1_code": null, "iso_3_code": "bxh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2411", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "\u2019Auhelawa", "iso_1_code": null, "iso_3_code": "kud", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2412", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Oya\u2019oya", "iso_1_code": null, "iso_3_code": "oyy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2413", + "scripts": [], + "own_tokenizer": false }, { "name": "Saliba", "iso_1_code": null, "iso_3_code": "sbe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2414", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Suau", "iso_1_code": null, "iso_3_code": "swp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2415", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bwanabwana", "iso_1_code": null, "iso_3_code": "tte", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2416", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Unubahe", "iso_1_code": null, "iso_3_code": "unu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2417", + "scripts": [], + "own_tokenizer": false }, { "name": "Wagawaga", "iso_1_code": null, "iso_3_code": "wgb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2418", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaleba", "iso_1_code": null, "iso_3_code": "ylb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2419", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2410", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2364", + "scripts": [], + "own_tokenizer": false }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Oumic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ouma", "iso_1_code": null, "iso_3_code": "oum", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2423", + "scripts": [], + "own_tokenizer": false }, { "name": "Magoric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bina", "iso_1_code": null, "iso_3_code": "bmn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2425", + "scripts": [], + "own_tokenizer": false }, { "name": "Yoba", "iso_1_code": null, "iso_3_code": "yob", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2426", + "scripts": [], + "own_tokenizer": false }, { "name": "Magori", "iso_1_code": null, "iso_3_code": "zgr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2427", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2424", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2422", + "scripts": [], + "own_tokenizer": false }, { "name": "Sinagoro-Keapara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Vula\u2019a", "iso_1_code": null, "iso_3_code": "hul", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2429", + "scripts": [], + "own_tokenizer": false }, { "name": "Keapara", "iso_1_code": null, "iso_3_code": "khz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2430", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Motu", "iso_1_code": null, "iso_3_code": "meu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2431", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sinaugoro", "iso_1_code": null, "iso_3_code": "snc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2432", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2428", + "scripts": [], + "own_tokenizer": false }, { "name": "West Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gabadi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Abadi", "iso_1_code": null, "iso_3_code": "kbt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2435", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2434", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Toura", "iso_1_code": null, "iso_3_code": "don", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2437", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuni", "iso_1_code": null, "iso_3_code": "kse", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2438", + "scripts": [], + "own_tokenizer": false }, { "name": "Mekeo", "iso_1_code": null, "iso_3_code": "mek", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2439", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lala", "iso_1_code": null, "iso_3_code": "nrz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2440", + "scripts": [], + "own_tokenizer": false }, { "name": "Waima", "iso_1_code": null, "iso_3_code": "rro", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2441", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2436", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2433", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2421", + "scripts": [], + "own_tokenizer": false }, { "name": "Kilivila-Louisiades", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kilivila", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Budibud", "iso_1_code": null, "iso_3_code": "btp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2444", + "scripts": [], + "own_tokenizer": false }, { "name": "Kilivila", "iso_1_code": null, "iso_3_code": "kij", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2445", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Muyuw", "iso_1_code": null, "iso_3_code": "myw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2446", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2443", + "scripts": [], + "own_tokenizer": false }, { "name": "Misima", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Misima-Panaeati", "iso_1_code": null, "iso_3_code": "mpx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2448", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2447", + "scripts": [], + "own_tokenizer": false }, { "name": "Nimoa-Sudest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rifao", "iso_1_code": null, "iso_3_code": "nmw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2450", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sudest", "iso_1_code": null, "iso_3_code": "tgo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2451", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2449", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2442", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2420", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2363", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2108", + "scripts": [], + "own_tokenizer": false }, { "name": "Yapese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yapese", "iso_1_code": null, "iso_3_code": "yap", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2453", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2452", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1739", + "scripts": [], + "own_tokenizer": false }, { "name": "South Halmahera-West New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "South Halmahera", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Irarutu", "iso_1_code": null, "iso_3_code": "irh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2456", + "scripts": [], + "own_tokenizer": false }, { "name": "East Makian-Gane", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gane", "iso_1_code": null, "iso_3_code": "gzn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2458", + "scripts": [], + "own_tokenizer": false }, { "name": "Makian, East", "iso_1_code": null, "iso_3_code": "mky", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2459", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2457", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buli", "iso_1_code": null, "iso_3_code": "bzq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2461", + "scripts": [], + "own_tokenizer": false }, { "name": "Maba", "iso_1_code": null, "iso_3_code": "mqa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2462", + "scripts": [], + "own_tokenizer": false }, { "name": "Patani", "iso_1_code": null, "iso_3_code": "ptn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2463", + "scripts": [], + "own_tokenizer": false }, { "name": "Sawai", "iso_1_code": null, "iso_3_code": "szw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2464", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2460", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2455", + "scripts": [], + "own_tokenizer": false }, { "name": "West New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bedoanas", "iso_1_code": null, "iso_3_code": "bed", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2467", + "scripts": [], + "own_tokenizer": false }, { "name": "Erokwanas", "iso_1_code": null, "iso_3_code": "erw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2468", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2466", + "scripts": [], + "own_tokenizer": false }, { "name": "Cenderawasih Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Biakic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Biak", "iso_1_code": null, "iso_3_code": "bhw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2471", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dusner", "iso_1_code": null, "iso_3_code": "dsn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2472", + "scripts": [], + "own_tokenizer": false }, { "name": "Meoswar", "iso_1_code": null, "iso_3_code": "mvx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2473", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2470", + "scripts": [], + "own_tokenizer": false }, { "name": "Iresim", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yeresiam", "iso_1_code": null, "iso_3_code": "ire", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2475", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2474", + "scripts": [], + "own_tokenizer": false }, { "name": "Mor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mor", "iso_1_code": null, "iso_3_code": "mhz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2477", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2476", + "scripts": [], + "own_tokenizer": false }, { "name": "Raja Ampat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "As", "iso_1_code": null, "iso_3_code": "asz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2479", + "scripts": [], + "own_tokenizer": false }, { "name": "Biga", "iso_1_code": null, "iso_3_code": "bhc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2480", + "scripts": [], + "own_tokenizer": false }, { "name": "Gebe", "iso_1_code": null, "iso_3_code": "gei", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2481", + "scripts": [], + "own_tokenizer": false }, { "name": "Kawe", "iso_1_code": null, "iso_3_code": "kgb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2482", + "scripts": [], + "own_tokenizer": false }, { "name": "Legenyem", "iso_1_code": null, "iso_3_code": "lcc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2483", + "scripts": [], + "own_tokenizer": false }, { "name": "Ma\u2019ya", "iso_1_code": null, "iso_3_code": "slz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2484", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambel", "iso_1_code": null, "iso_3_code": "wgo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2485", + "scripts": [], + "own_tokenizer": false }, { "name": "Wauyai", "iso_1_code": null, "iso_3_code": "wuy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2486", + "scripts": [], + "own_tokenizer": false }, { "name": "Matbat", "iso_1_code": null, "iso_3_code": "xmt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2487", + "scripts": [], + "own_tokenizer": false }, { "name": "Salawati", "iso_1_code": null, "iso_3_code": "xmx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2488", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2478", + "scripts": [], + "own_tokenizer": false }, { "name": "Tandia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tandia", "iso_1_code": null, "iso_3_code": "tni", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2490", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2489", + "scripts": [], + "own_tokenizer": false }, { "name": "Waropen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Waropen", "iso_1_code": null, "iso_3_code": "wrp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2492", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2491", + "scripts": [], + "own_tokenizer": false }, { "name": "Yapen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central-Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ambai", "iso_1_code": null, "iso_3_code": "amk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2495", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ansus", "iso_1_code": null, "iso_3_code": "and", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2496", + "scripts": [], + "own_tokenizer": false }, { "name": "Busami", "iso_1_code": null, "iso_3_code": "bsm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2497", + "scripts": [], + "own_tokenizer": false }, { "name": "Munggui", "iso_1_code": null, "iso_3_code": "mth", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2498", + "scripts": [], + "own_tokenizer": false }, { "name": "Marau", "iso_1_code": null, "iso_3_code": "mvr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2499", + "scripts": [], + "own_tokenizer": false }, { "name": "Pom", "iso_1_code": null, "iso_3_code": "pmo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2500", + "scripts": [], + "own_tokenizer": false }, { "name": "Papuma", "iso_1_code": null, "iso_3_code": "ppm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2501", + "scripts": [], + "own_tokenizer": false }, { "name": "Roon", "iso_1_code": null, "iso_3_code": "rnn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2502", + "scripts": [], + "own_tokenizer": false }, { "name": "Serui-Laut", "iso_1_code": null, "iso_3_code": "seu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2503", + "scripts": [], + "own_tokenizer": false }, { "name": "Wamesa", "iso_1_code": null, "iso_3_code": "wad", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2504", + "scripts": [], + "own_tokenizer": false }, { "name": "Woi", "iso_1_code": null, "iso_3_code": "wbw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2505", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2494", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kurudu", "iso_1_code": null, "iso_3_code": "kjr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2507", + "scripts": [], + "own_tokenizer": false }, { "name": "Wabo", "iso_1_code": null, "iso_3_code": "wbb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2508", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2506", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2493", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yaur", "iso_1_code": null, "iso_3_code": "jau", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2510", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2509", + "scripts": [], + "own_tokenizer": false }, { "name": "Yeretuar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yeretuar", "iso_1_code": null, "iso_3_code": "gop", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2512", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2511", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2469", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2465", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2454", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1738", + "scripts": [], + "own_tokenizer": false }, { "name": "Hukumina", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hukumina", "iso_1_code": null, "iso_3_code": "huw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2514", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2513", + "scripts": [], + "own_tokenizer": false }, { "name": "North Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Arguni", "iso_1_code": null, "iso_3_code": "agf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2516", + "scripts": [], + "own_tokenizer": false }, { "name": "Onin", "iso_1_code": null, "iso_3_code": "oni", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2517", + "scripts": [], + "own_tokenizer": false }, { "name": "Sekar", "iso_1_code": null, "iso_3_code": "skz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2518", + "scripts": [], + "own_tokenizer": false }, { "name": "Uruangnirin", "iso_1_code": null, "iso_3_code": "urn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2519", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2515", + "scripts": [], + "own_tokenizer": false }, { "name": "South Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kowiai", "iso_1_code": null, "iso_3_code": "kwh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2520", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeast Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kei-Tanimbar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kei-Fordata", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Fordata", "iso_1_code": null, "iso_3_code": "frd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2525", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kei", "iso_1_code": null, "iso_3_code": "kei", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2526", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2524", + "scripts": [], + "own_tokenizer": false }, { "name": "Yamdena", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yamdena", "iso_1_code": null, "iso_3_code": "jmd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2528", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2527", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2523", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Selaru", "iso_1_code": null, "iso_3_code": "slu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2530", + "scripts": [], + "own_tokenizer": false }, { "name": "Seluwasan", "iso_1_code": null, "iso_3_code": "sws", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2531", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2529", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2522", + "scripts": [], + "own_tokenizer": false }, { "name": "Sumba-Hawu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hawu-Dhao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hawu", "iso_1_code": null, "iso_3_code": "hvn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2534", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dhao", "iso_1_code": null, "iso_3_code": "nfa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2535", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2533", + "scripts": [], + "own_tokenizer": false }, { "name": "Sumba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Anakalangu", "iso_1_code": null, "iso_3_code": "akg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2537", + "scripts": [], + "own_tokenizer": false }, { "name": "Kodi", "iso_1_code": null, "iso_3_code": "kod", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2538", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamboya", "iso_1_code": null, "iso_3_code": "lmy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2539", + "scripts": [], + "own_tokenizer": false }, { "name": "Loura", "iso_1_code": null, "iso_3_code": "lur", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2540", + "scripts": [], + "own_tokenizer": false }, { "name": "Mamboru", "iso_1_code": null, "iso_3_code": "mvd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2541", + "scripts": [], + "own_tokenizer": false }, { "name": "Wejewa", "iso_1_code": null, "iso_3_code": "wew", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2542", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wanukaka", "iso_1_code": null, "iso_3_code": "wnk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2543", + "scripts": [], + "own_tokenizer": false }, { "name": "Kambera", "iso_1_code": null, "iso_3_code": "xbr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2544", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2536", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2532", + "scripts": [], + "own_tokenizer": false }, { "name": "Teor-Kur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kur", "iso_1_code": null, "iso_3_code": "kuv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2546", + "scripts": [], + "own_tokenizer": false }, { "name": "Teor", "iso_1_code": null, "iso_3_code": "tev", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2547", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2545", + "scripts": [], + "own_tokenizer": false }, { "name": "Timor-Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nuclear Timor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Galolen", "iso_1_code": null, "iso_3_code": "gal", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2550", + "scripts": [], + "own_tokenizer": false }, { "name": "Habun", "iso_1_code": null, "iso_3_code": "hbu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2551", + "scripts": [], + "own_tokenizer": false }, { "name": "Helong", "iso_1_code": null, "iso_3_code": "heg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2552", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Idat\u00e9", "iso_1_code": null, "iso_3_code": "idt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2553", + "scripts": [], + "own_tokenizer": false }, { "name": "Kemak", "iso_1_code": null, "iso_3_code": "kem", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2554", + "scripts": [], + "own_tokenizer": false }, { "name": "Kairui-Midiki", "iso_1_code": null, "iso_3_code": "krd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2555", + "scripts": [], + "own_tokenizer": false }, { "name": "Lakalei", "iso_1_code": null, "iso_3_code": "lka", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2556", + "scripts": [], + "own_tokenizer": false }, { "name": "Makuva", "iso_1_code": null, "iso_3_code": "lva", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2557", + "scripts": [], + "own_tokenizer": false }, { "name": "Mambae", "iso_1_code": null, "iso_3_code": "mgm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2558", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nauete", "iso_1_code": null, "iso_3_code": "nxa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2559", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetun", "iso_1_code": null, "iso_3_code": "tet", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2560", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tukudede", "iso_1_code": null, "iso_3_code": "tkd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2561", + "scripts": [], + "own_tokenizer": false }, { "name": "Welaun", "iso_1_code": null, "iso_3_code": "wlh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2562", + "scripts": [], + "own_tokenizer": false }, { "name": "Waima\u2019a", "iso_1_code": null, "iso_3_code": "wmh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2563", + "scripts": [], + "own_tokenizer": false }, { "name": "Rote", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bilba", "iso_1_code": null, "iso_3_code": "bpz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2565", + "scripts": [], + "own_tokenizer": false }, { "name": "Dengka", "iso_1_code": null, "iso_3_code": "dnk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2566", + "scripts": [], + "own_tokenizer": false }, { "name": "Lole", "iso_1_code": null, "iso_3_code": "llg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2567", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Rikou", "iso_1_code": null, "iso_3_code": "rgu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2568", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dela-Oenale", "iso_1_code": null, "iso_3_code": "row", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2569", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Termanu", "iso_1_code": null, "iso_3_code": "twu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2570", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tii", "iso_1_code": null, "iso_3_code": "txq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2571", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2564", + "scripts": [], + "own_tokenizer": false }, { "name": "Uab Meto", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Amarasi", "iso_1_code": null, "iso_3_code": "aaz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2573", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Uab Meto", "iso_1_code": null, "iso_3_code": "aoz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2574", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Baikeno", "iso_1_code": null, "iso_3_code": "bkx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2575", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2572", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2549", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East Damar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Damar, East", "iso_1_code": null, "iso_3_code": "dmr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2578", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2577", + "scripts": [], + "own_tokenizer": false }, { "name": "Kisar-Roma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kisar", "iso_1_code": null, "iso_3_code": "kje", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2580", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Roma", "iso_1_code": null, "iso_3_code": "rmm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2581", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2579", + "scripts": [], + "own_tokenizer": false }, { "name": "Luang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Luang", "iso_1_code": null, "iso_3_code": "lex", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2583", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Leti", "iso_1_code": null, "iso_3_code": "lti", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2584", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2582", + "scripts": [], + "own_tokenizer": false }, { "name": "Teun-Nila-Serua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nila-Serua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nila", "iso_1_code": null, "iso_3_code": "nil", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2587", + "scripts": [], + "own_tokenizer": false }, { "name": "Serua", "iso_1_code": null, "iso_3_code": "srw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2588", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2586", + "scripts": [], + "own_tokenizer": false }, { "name": "Teun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Te\u2019un", "iso_1_code": null, "iso_3_code": "tve", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2590", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2589", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2585", + "scripts": [], + "own_tokenizer": false }, { "name": "Wetar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Atauran", "iso_1_code": null, "iso_3_code": "adb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2592", + "scripts": [], + "own_tokenizer": false }, { "name": "Aputai", "iso_1_code": null, "iso_3_code": "apx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2593", + "scripts": [], + "own_tokenizer": false }, { "name": "Ili\u2019uun", "iso_1_code": null, "iso_3_code": "ilu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2594", + "scripts": [], + "own_tokenizer": false }, { "name": "Tugun", "iso_1_code": null, "iso_3_code": "tzn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2595", + "scripts": [], + "own_tokenizer": false }, { "name": "Perai", "iso_1_code": null, "iso_3_code": "wet", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2596", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2591", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2576", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2548", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kuri", "iso_1_code": null, "iso_3_code": "nbn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2598", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2597", + "scripts": [], + "own_tokenizer": false }, { "name": "West Damar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Damar, West", "iso_1_code": null, "iso_3_code": "drn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2600", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2599", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1589", + "scripts": [], + "own_tokenizer": false }, { "name": "Chamorro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chamorro", "iso_1_code": "ch", "iso_3_code": "cha", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2602", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2601", + "scripts": [], + "own_tokenizer": false }, { "name": "Greater Barito", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Barito-Mahakam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ampanang", "iso_1_code": null, "iso_3_code": "apg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2605", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunjung", "iso_1_code": null, "iso_3_code": "tjg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2606", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2604", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central-South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dusun Deyah", "iso_1_code": null, "iso_3_code": "dun", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2610", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2609", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dusun Malang", "iso_1_code": null, "iso_3_code": "duq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2612", + "scripts": [], + "own_tokenizer": false }, { "name": "Dusun Witu", "iso_1_code": null, "iso_3_code": "duw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2613", + "scripts": [], + "own_tokenizer": false }, { "name": "Ma\u2019anyan", "iso_1_code": null, "iso_3_code": "mhy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2614", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Paku", "iso_1_code": null, "iso_3_code": "pku", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2615", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2611", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2608", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Malagasy, Bara", "iso_1_code": "mg", "iso_3_code": "bhr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2617", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy, Northern Betsimisaraka", "iso_1_code": "mg", "iso_3_code": "bmm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2618", + "scripts": [], + "own_tokenizer": false }, { "name": "Bushi", "iso_1_code": null, "iso_3_code": "buc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2619", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy, Southern Betsimisaraka", "iso_1_code": "mg", "iso_3_code": "bzc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2620", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy, Masikoro", "iso_1_code": "mg", "iso_3_code": "msh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2621", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy, Merina", "iso_1_code": "mg", "iso_3_code": "plt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2622", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malagasy, Sakalava", "iso_1_code": "mg", "iso_3_code": "skg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2623", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malagasy, Tandroy-Mahafaly", "iso_1_code": "mg", "iso_3_code": "tdx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2624", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malagasy, Tesaka", "iso_1_code": "mg", "iso_3_code": "tkg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2625", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy, Tanosy", "iso_1_code": "mg", "iso_3_code": "txy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2626", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy, Antankarana", "iso_1_code": "mg", "iso_3_code": "xmv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2627", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malagasy, Tsimihety", "iso_1_code": "mg", "iso_3_code": "xmw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2628", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2616", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lawangan", "iso_1_code": null, "iso_3_code": "lbx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2630", + "scripts": [], + "own_tokenizer": false }, { "name": "Tawoyan", "iso_1_code": null, "iso_3_code": "twy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2631", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2629", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2607", + "scripts": [], + "own_tokenizer": false }, { "name": "Sama-Bajaw", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yakan", "iso_1_code": null, "iso_3_code": "yka", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2633", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Abaknon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Inabaknon", "iso_1_code": null, "iso_3_code": "abx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2635", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2634", + "scripts": [], + "own_tokenizer": false }, { "name": "Sulu-Borneo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Borneo Coast Bajaw", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bajau, Indonesian", "iso_1_code": null, "iso_3_code": "bdl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2638", + "scripts": [], + "own_tokenizer": false }, { "name": "Bajau, West Coast", "iso_1_code": null, "iso_3_code": "bdr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2639", + "scripts": [], + "own_tokenizer": false }, { "name": "Mapun", "iso_1_code": null, "iso_3_code": "sjm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2640", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2637", + "scripts": [], + "own_tokenizer": false }, { "name": "Inner Sulu Sama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sama, Central", "iso_1_code": null, "iso_3_code": "sml", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2642", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sama, Southern", "iso_1_code": null, "iso_3_code": "ssb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2643", + "scripts": [], + "own_tokenizer": false }, { "name": "Sama, Balangingih", "iso_1_code": null, "iso_3_code": "sse", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2644", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2641", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Sulu Sama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sama, Pangutaran", "iso_1_code": null, "iso_3_code": "slm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2646", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2645", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2636", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2632", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kohin", "iso_1_code": null, "iso_3_code": "kkx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2649", + "scripts": [], + "own_tokenizer": false }, { "name": "Ot Danum", "iso_1_code": null, "iso_3_code": "otd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2650", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Siang", "iso_1_code": null, "iso_3_code": "sya", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2651", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2648", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bakumpai", "iso_1_code": null, "iso_3_code": "bkr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2653", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngaju", "iso_1_code": null, "iso_3_code": "nij", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2654", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2652", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2647", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2603", + "scripts": [], + "own_tokenizer": false }, { "name": "Greater Central Philippine", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Central Philippine", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Ata", "iso_1_code": null, "iso_3_code": "atm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2657", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayta, Sorsogon", "iso_1_code": null, "iso_3_code": "ays", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2658", + "scripts": [], + "own_tokenizer": false }, { "name": "Binukidnon, Northern", "iso_1_code": null, "iso_3_code": "kyn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2659", + "scripts": [], + "own_tokenizer": false }, { "name": "Binukidnon, Southern", "iso_1_code": null, "iso_3_code": "mtw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2660", + "scripts": [], + "own_tokenizer": false }, { "name": "Sulod", "iso_1_code": null, "iso_3_code": "srg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2661", + "scripts": [], + "own_tokenizer": false }, { "name": "Bikol", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Agta, Katubung", "iso_1_code": null, "iso_3_code": "agk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2665", + "scripts": [], + "own_tokenizer": false }, { "name": "Agta, Mt. Iraya", "iso_1_code": null, "iso_3_code": "atl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2666", + "scripts": [], + "own_tokenizer": false }, { "name": "Bikol, Central", "iso_1_code": null, "iso_3_code": "bcl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2667", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2664", + "scripts": [], + "own_tokenizer": false }, { "name": "Virac", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bikol, Southern Catanduanes", "iso_1_code": null, "iso_3_code": "bln", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2669", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2668", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2663", + "scripts": [], + "own_tokenizer": false }, { "name": "Inland", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Agta, Mt. Iriga", "iso_1_code": null, "iso_3_code": "agz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2671", + "scripts": [], + "own_tokenizer": false }, { "name": "Bikol, West Albay", "iso_1_code": null, "iso_3_code": "fbl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2672", + "scripts": [], + "own_tokenizer": false }, { "name": "Bikol, Libon", "iso_1_code": null, "iso_3_code": "lbl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2673", + "scripts": [], + "own_tokenizer": false }, { "name": "Bikol, Miraya", "iso_1_code": null, "iso_3_code": "rbl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2674", + "scripts": [], + "own_tokenizer": false }, { "name": "Bikol, Buhi\u2019non", "iso_1_code": null, "iso_3_code": "ubl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2675", + "scripts": [], + "own_tokenizer": false }, { "name": "Iriga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bikol, Rinconada", "iso_1_code": null, "iso_3_code": "bto", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2677", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2676", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2670", + "scripts": [], + "own_tokenizer": false }, { "name": "Pandan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bikol, Northern Catanduanes", "iso_1_code": null, "iso_3_code": "cts", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2679", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2678", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2662", + "scripts": [], + "own_tokenizer": false }, { "name": "Bisayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Banton", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bantoanon", "iso_1_code": null, "iso_3_code": "bno", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2682", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2681", + "scripts": [], + "own_tokenizer": false }, { "name": "Cebuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Cebuano", "iso_1_code": null, "iso_3_code": "ceb", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2684", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2683", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Bantayanon", "iso_1_code": null, "iso_3_code": "bfx", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2686", + "scripts": [], + "own_tokenizer": false }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ati", "iso_1_code": null, "iso_3_code": "atk", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2688", + "scripts": [], + "own_tokenizer": false }, { "name": "Capiznon", "iso_1_code": null, "iso_3_code": "cps", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2689", + "scripts": [], + "own_tokenizer": false }, { "name": "Hiligaynon", "iso_1_code": null, "iso_3_code": "hil", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2690", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Masbatenyo", "iso_1_code": null, "iso_3_code": "msb", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2691", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Porohanon", "iso_1_code": null, "iso_3_code": "prh", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2692", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2687", + "scripts": [], + "own_tokenizer": false }, { "name": "Romblon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Romblomanon", "iso_1_code": null, "iso_3_code": "rol", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2694", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2693", + "scripts": [], + "own_tokenizer": false }, { "name": "Warayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Sorsoganon, Northern", "iso_1_code": null, "iso_3_code": "bks", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2696", + "scripts": [], + "own_tokenizer": false }, { "name": "Baybayanon", "iso_1_code": null, "iso_3_code": "bvy", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2697", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinabalian", "iso_1_code": null, "iso_3_code": "cbw", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2698", + "scripts": [], + "own_tokenizer": false }, { "name": "Gubat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sorsoganon, Southern", "iso_1_code": null, "iso_3_code": "srv", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2700", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2699", + "scripts": [], + "own_tokenizer": false }, { "name": "Samar-Waray", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Waray-Waray", "iso_1_code": null, "iso_3_code": "war", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "2702", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "2701", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2695", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2685", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Surigaonon", "iso_1_code": null, "iso_3_code": "sgd", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2704", + "scripts": [], + "own_tokenizer": false }, { "name": "Tandaganon", "iso_1_code": null, "iso_3_code": "tgn", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2705", + "scripts": [], + "own_tokenizer": false }, { "name": "Butuan-Tausug", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Butuanon", "iso_1_code": null, "iso_3_code": "btw", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2707", + "scripts": [], + "own_tokenizer": false }, { "name": "Tausug", "iso_1_code": null, "iso_3_code": "tsg", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2708", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2706", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2703", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } + }, "children": [ { "name": "Caluyanun", "iso_1_code": null, "iso_3_code": "clu", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2710", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Aklan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aklanon", "iso_1_code": null, "iso_3_code": "akl", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2712", + "scripts": [], + "own_tokenizer": false }, { "name": "Malaynon", "iso_1_code": null, "iso_3_code": "mlz", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2713", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2711", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinarayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kinaray-a", "iso_1_code": null, "iso_3_code": "krj", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2715", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2714", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ratagnon", "iso_1_code": null, "iso_3_code": "btn", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2717", + "scripts": [], + "own_tokenizer": false }, { "name": "Cuyonon", "iso_1_code": null, "iso_3_code": "cyo", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2718", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2716", + "scripts": [], + "own_tokenizer": false }, { "name": "North Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Inonhan", "iso_1_code": null, "iso_3_code": "loc", - "tokenizer": { - "name": "war", - "tokenizer": "IndicNLPTokenizer(\"war\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2720", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2719", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2709", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2680", + "scripts": [], + "own_tokenizer": false }, { "name": "Mamanwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Minamanwa", "iso_1_code": null, "iso_3_code": "mmn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2722", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2721", + "scripts": [], + "own_tokenizer": false }, { "name": "Mansakan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Davawenyo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Davawenyo", "iso_1_code": null, "iso_3_code": "daw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2725", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2724", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mandaya", "iso_1_code": null, "iso_3_code": "mry", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2727", + "scripts": [], + "own_tokenizer": false }, { "name": "Mansaka", "iso_1_code": null, "iso_3_code": "msk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2728", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2726", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kamayo", "iso_1_code": null, "iso_3_code": "kyk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2730", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2729", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tagakolu", "iso_1_code": null, "iso_3_code": "klg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2732", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalagan, Kagan", "iso_1_code": null, "iso_3_code": "kll", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2733", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalagan", "iso_1_code": null, "iso_3_code": "kqe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2734", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2731", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2723", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagalog", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Filipino", "iso_1_code": null, "iso_3_code": "fil", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2736", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tagalog", "iso_1_code": "tl", "iso_3_code": "tgl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "own", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2737", + "scripts": [], + "own_tokenizer": true } - ] + ], + "node_i": "2735", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2656", + "scripts": [], + "own_tokenizer": false }, { "name": "Danao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Magindanao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Maguindanaon", "iso_1_code": null, "iso_3_code": "mdh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2740", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2739", + "scripts": [], + "own_tokenizer": false }, { "name": "Maranao-Iranon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Iranun", "iso_1_code": null, "iso_3_code": "ilm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2742", + "scripts": [], + "own_tokenizer": false }, { "name": "Iranun", "iso_1_code": null, "iso_3_code": "ilp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2743", + "scripts": [], + "own_tokenizer": false }, { "name": "Maranao", "iso_1_code": null, "iso_3_code": "mrw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2744", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2741", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2738", + "scripts": [], + "own_tokenizer": false }, { "name": "Gorontalo-Mongondow", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gorontalic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bolango", "iso_1_code": null, "iso_3_code": "bld", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2747", + "scripts": [], + "own_tokenizer": false }, { "name": "Buol", "iso_1_code": null, "iso_3_code": "blf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2748", + "scripts": [], + "own_tokenizer": false }, { "name": "Bintauna", "iso_1_code": null, "iso_3_code": "bne", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2749", + "scripts": [], + "own_tokenizer": false }, { "name": "Gorontalo", "iso_1_code": null, "iso_3_code": "gor", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2750", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kaidipang", "iso_1_code": null, "iso_3_code": "kzp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2751", + "scripts": [], + "own_tokenizer": false }, { "name": "Lolak", "iso_1_code": null, "iso_3_code": "llq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2752", + "scripts": [], + "own_tokenizer": false }, { "name": "Suwawa", "iso_1_code": null, "iso_3_code": "swu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2753", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2746", + "scripts": [], + "own_tokenizer": false }, { "name": "Mongondowic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mongondow", "iso_1_code": null, "iso_3_code": "mog", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2755", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ponosakan", "iso_1_code": null, "iso_3_code": "pns", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2756", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2754", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2745", + "scripts": [], + "own_tokenizer": false }, { "name": "Manobo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Manobo, Dibabawon", "iso_1_code": null, "iso_3_code": "mbd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2760", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manobo, Rajah Kabunsuwan", "iso_1_code": null, "iso_3_code": "mqk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2761", + "scripts": [], + "own_tokenizer": false }, { "name": "Manobo, Agusan", "iso_1_code": null, "iso_3_code": "msm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2762", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2759", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ata-Tigwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Manobo, Ata", "iso_1_code": null, "iso_3_code": "atd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2765", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manobo, Matigsalug", "iso_1_code": null, "iso_3_code": "mbt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2766", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2764", + "scripts": [], + "own_tokenizer": false }, { "name": "Obo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Manobo, Obo", "iso_1_code": null, "iso_3_code": "obo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2768", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2767", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2763", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Manobo, Western Bukidnon", "iso_1_code": null, "iso_3_code": "mbb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2770", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manobo, Ilianen", "iso_1_code": null, "iso_3_code": "mbi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2771", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2769", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2758", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Binukid", "iso_1_code": null, "iso_3_code": "bkd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2773", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kagayanen", "iso_1_code": null, "iso_3_code": "cgc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2774", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Higaonon", "iso_1_code": null, "iso_3_code": "mba", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2775", + "scripts": [], + "own_tokenizer": false }, { "name": "Manobo, Kinamiging", "iso_1_code": null, "iso_3_code": "mkx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2776", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2772", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tagabawa", "iso_1_code": null, "iso_3_code": "bgs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2778", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manobo, Sarangani", "iso_1_code": null, "iso_3_code": "mbs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2779", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manobo, Cotabato", "iso_1_code": null, "iso_3_code": "mta", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2780", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2777", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2757", + "scripts": [], + "own_tokenizer": false }, { "name": "Palawanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bonggi", "iso_1_code": null, "iso_3_code": "bdg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2782", + "scripts": [], + "own_tokenizer": false }, { "name": "Batak", "iso_1_code": null, "iso_3_code": "bya", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2783", + "scripts": [], + "own_tokenizer": false }, { "name": "Palawano, Central", "iso_1_code": null, "iso_3_code": "plc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2784", + "scripts": [], + "own_tokenizer": false }, { "name": "Palawano, Southwest", "iso_1_code": null, "iso_3_code": "plv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2785", + "scripts": [], + "own_tokenizer": false }, { "name": "Palawano, Brooke\u2019s Point", "iso_1_code": null, "iso_3_code": "plw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2786", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Molbog", "iso_1_code": null, "iso_3_code": "pwm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2787", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagbanwa", "iso_1_code": null, "iso_3_code": "tbw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2788", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tagbanwa, Central", "iso_1_code": null, "iso_3_code": "tgt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2789", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2781", + "scripts": [], + "own_tokenizer": false }, { "name": "South Mangyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buhid-Taubuid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Buhid", "iso_1_code": null, "iso_3_code": "bku", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2792", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bangon", "iso_1_code": null, "iso_3_code": "bnj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2793", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tawbuid", "iso_1_code": null, "iso_3_code": "twb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2794", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2791", + "scripts": [], + "own_tokenizer": false }, { "name": "Hanunoo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hanunoo", "iso_1_code": null, "iso_3_code": "hnn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2796", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2795", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2790", + "scripts": [], + "own_tokenizer": false }, { "name": "Subanon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Subanon, Western", "iso_1_code": null, "iso_3_code": "suc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2798", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Subanen, Southern", "iso_1_code": null, "iso_3_code": "laa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2800", + "scripts": [], + "own_tokenizer": false }, { "name": "Subanen, Eastern", "iso_1_code": null, "iso_3_code": "sfe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2801", + "scripts": [], + "own_tokenizer": false }, { "name": "Subanon, Kolibugan", "iso_1_code": null, "iso_3_code": "skn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2802", + "scripts": [], + "own_tokenizer": false }, { "name": "Subanen, Northern", "iso_1_code": null, "iso_3_code": "stb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2803", + "scripts": [], + "own_tokenizer": false }, { "name": "Subanen, Central", "iso_1_code": null, "iso_3_code": "syb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2804", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2799", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2797", + "scripts": [], + "own_tokenizer": false }, { "name": "Umiray Dumaget", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Manide", "iso_1_code": null, "iso_3_code": "abd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2806", + "scripts": [], + "own_tokenizer": false }, { "name": "Agta, Umiray Dumaget", "iso_1_code": null, "iso_3_code": "due", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"war\")", + "original_lang_name": "war", + "original_lang_code": "war", + "scripts": [ + "Latn" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2807", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Inagta Alabat", "iso_1_code": null, "iso_3_code": "dul", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2808", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2805", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2655", + "scripts": [], + "own_tokenizer": false }, { "name": "Javanese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Javanese, New Caledonian", "iso_1_code": null, "iso_3_code": "jas", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2810", + "scripts": [], + "own_tokenizer": false }, { "name": "Javanese", "iso_1_code": "jv", "iso_3_code": "jav", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2811", + "scripts": [ + "Latn", + "Java" + ], + "own_tokenizer": false }, { "name": "Javanese, Suriname", "iso_1_code": null, "iso_3_code": "jvn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2812", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Osing", "iso_1_code": null, "iso_3_code": "osi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2813", + "scripts": [], + "own_tokenizer": false }, { "name": "Tengger", "iso_1_code": null, "iso_3_code": "tes", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2814", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2809", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalamian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Agutaynen", "iso_1_code": null, "iso_3_code": "agn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2816", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tagbanwa, Calamian", "iso_1_code": null, "iso_3_code": "tbk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2817", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2815", + "scripts": [], + "own_tokenizer": false }, { "name": "Lampung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lampung Nyo", "iso_1_code": null, "iso_3_code": "abl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2819", + "scripts": [], + "own_tokenizer": false }, { "name": "Komering", "iso_1_code": null, "iso_3_code": "kge", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2820", + "scripts": [], + "own_tokenizer": false }, { "name": "Lampung Api", "iso_1_code": null, "iso_3_code": "ljp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2821", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2818", + "scripts": [], + "own_tokenizer": false }, { "name": "Land Dayak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Benyadu\u2019", "iso_1_code": null, "iso_3_code": "byd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2823", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanggau", "iso_1_code": null, "iso_3_code": "scg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2824", + "scripts": [], + "own_tokenizer": false }, { "name": "Bakati\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bakati\u2019", "iso_1_code": null, "iso_3_code": "bei", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2826", + "scripts": [], + "own_tokenizer": false }, { "name": "Bakati\u2019, Rara", "iso_1_code": null, "iso_3_code": "lra", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2827", + "scripts": [], + "own_tokenizer": false }, { "name": "Bakati\u2019, Sara", "iso_1_code": null, "iso_3_code": "sre", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2828", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2825", + "scripts": [], + "own_tokenizer": false }, { "name": "Bidayuh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Core", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bidayuh, Biatah", "iso_1_code": null, "iso_3_code": "bth", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2832", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2831", + "scripts": [], + "own_tokenizer": false }, { "name": "Sembaan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bidayuh, Tringgus-Sembaan", "iso_1_code": null, "iso_3_code": "trx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2834", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2833", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bidayuh, Bau", "iso_1_code": null, "iso_3_code": "sne", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2836", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2835", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2830", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bidayuh Serian", "iso_1_code": null, "iso_3_code": "sdo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2838", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2837", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2829", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Jangkang", "iso_1_code": null, "iso_3_code": "djo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2840", + "scripts": [], + "own_tokenizer": false }, { "name": "Beginci", "iso_1_code": null, "iso_3_code": "ebc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2841", + "scripts": [], + "own_tokenizer": false }, { "name": "Gerai", "iso_1_code": null, "iso_3_code": "gef", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2842", + "scripts": [], + "own_tokenizer": false }, { "name": "Ribun", "iso_1_code": null, "iso_3_code": "rir", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2843", + "scripts": [], + "own_tokenizer": false }, { "name": "Semandang", "iso_1_code": null, "iso_3_code": "sdq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2844", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mateq", "iso_1_code": null, "iso_3_code": "xem", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2845", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2839", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2822", + "scripts": [], + "own_tokenizer": false }, { "name": "Madurese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kangean", "iso_1_code": null, "iso_3_code": "kkv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2847", + "scripts": [], + "own_tokenizer": false }, { "name": "Madura", "iso_1_code": null, "iso_3_code": "mad", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2848", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2846", + "scripts": [], + "own_tokenizer": false }, { "name": "Malayo-Chamic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Chamic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Acehnese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aceh", "iso_1_code": null, "iso_3_code": "ace", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2852", + "scripts": [ + "Latn", + "Arab" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2851", + "scripts": [], + "own_tokenizer": false }, { "name": "Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Haroi", "iso_1_code": null, "iso_3_code": "hro", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2854", + "scripts": [], + "own_tokenizer": false }, { "name": "Cham", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Cham, Western", "iso_1_code": null, "iso_3_code": "cja", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2856", + "scripts": [], + "own_tokenizer": false }, { "name": "Cham, Eastern", "iso_1_code": null, "iso_3_code": "cjm", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2857", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2855", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2853", + "scripts": [], + "own_tokenizer": false }, { "name": "Highlands", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bih", "iso_1_code": null, "iso_3_code": "ibh", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2859", + "scripts": [], + "own_tokenizer": false }, { "name": "Jarai", "iso_1_code": null, "iso_3_code": "jra", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2860", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Rade", "iso_1_code": null, "iso_3_code": "rad", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2861", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chru-Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chru", "iso_1_code": null, "iso_3_code": "cje", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2863", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Cham", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tsat", "iso_1_code": null, "iso_3_code": "huq", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2865", + "scripts": [], + "own_tokenizer": false }, { "name": "Roglai, Southern", "iso_1_code": null, "iso_3_code": "rgs", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2866", + "scripts": [], + "own_tokenizer": false }, { "name": "Roglai, Cacgia", "iso_1_code": null, "iso_3_code": "roc", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2867", + "scripts": [], + "own_tokenizer": false }, { "name": "Roglai, Northern", "iso_1_code": null, "iso_3_code": "rog", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2868", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2864", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2862", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2858", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2850", + "scripts": [], + "own_tokenizer": false }, { "name": "Malayic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Keninjal", "iso_1_code": null, "iso_3_code": "knl", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2870", + "scripts": [], + "own_tokenizer": false }, { "name": "Kendayan", "iso_1_code": null, "iso_3_code": "knx", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2871", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Urak Lawoi\u2019", "iso_1_code": "ms", "iso_3_code": "urk", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2872", + "scripts": [ + "Thai" + ], + "own_tokenizer": true }, { "name": "Malayic Dayak", "iso_1_code": null, "iso_3_code": "xdy", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2873", + "scripts": [], + "own_tokenizer": false }, { "name": "Ibanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Iban", "iso_1_code": null, "iso_3_code": "iba", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2875", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Remun", "iso_1_code": null, "iso_3_code": "lkj", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2876", + "scripts": [], + "own_tokenizer": false }, { "name": "Mualang", "iso_1_code": null, "iso_3_code": "mtd", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2877", + "scripts": [], + "own_tokenizer": false }, { "name": "Seberuang", "iso_1_code": null, "iso_3_code": "sbx", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2878", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2874", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Banjar", "iso_1_code": "ms", "iso_3_code": "bjn", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2880", + "scripts": [ + "Latn", + "Arab" + ], + "own_tokenizer": true }, { "name": "Malay, Bacanese", "iso_1_code": "ms", "iso_3_code": "btj", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2881", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Berau", "iso_1_code": "ms", "iso_3_code": "bve", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2882", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Bukit", "iso_1_code": "ms", "iso_3_code": "bvu", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2883", + "scripts": [], + "own_tokenizer": true }, { "name": "Duano", "iso_1_code": "ms", "iso_3_code": "dup", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2884", + "scripts": [], + "own_tokenizer": true }, { "name": "Haji", "iso_1_code": "ms", "iso_3_code": "hji", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2885", + "scripts": [], + "own_tokenizer": true }, { "name": "Indonesian", "iso_1_code": "id", "iso_3_code": "ind", - "tokenizer": { - "name": "indonesian", - "tokenizer": "SpaCyTokenizer(\"id\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "2886", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Jakun", "iso_1_code": "ms", "iso_3_code": "jak", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2887", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Jambi", "iso_1_code": "ms", "iso_3_code": "jax", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2888", + "scripts": [], + "own_tokenizer": true }, { "name": "Kubu", "iso_1_code": "ms", "iso_3_code": "kvb", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2889", + "scripts": [], + "own_tokenizer": true }, { "name": "Kerinci", "iso_1_code": "ms", "iso_3_code": "kvr", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2890", + "scripts": [], + "own_tokenizer": true }, { "name": "Brunei", "iso_1_code": "ms", "iso_3_code": "kxd", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2891", + "scripts": [], + "own_tokenizer": true }, { "name": "Sekak", "iso_1_code": "ms", "iso_3_code": "lce", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2892", + "scripts": [], + "own_tokenizer": true }, { "name": "Lubu", "iso_1_code": "ms", "iso_3_code": "lcf", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2893", + "scripts": [], + "own_tokenizer": true }, { "name": "Col", "iso_1_code": "ms", "iso_3_code": "liw", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2894", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Kedah", "iso_1_code": "ms", "iso_3_code": "meo", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2895", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Pattani", "iso_1_code": "ms", "iso_3_code": "mfa", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2896", + "scripts": [], + "own_tokenizer": true }, { "name": "Bangka", "iso_1_code": "ms", "iso_3_code": "mfb", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2897", + "scripts": [], + "own_tokenizer": true }, { "name": "Indonesian, Makassar", "iso_1_code": null, "iso_3_code": "mfp", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2898", + "scripts": [], + "own_tokenizer": false }, { "name": "Minangkabau", "iso_1_code": "ms", "iso_3_code": "min", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2899", + "scripts": [ + "Latn", + "Arab" + ], + "own_tokenizer": true }, { "name": "Malay, Kota Bangun Kutai", "iso_1_code": "ms", "iso_3_code": "mqg", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2900", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Sabah", "iso_1_code": "ms", "iso_3_code": "msi", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2901", + "scripts": [], + "own_tokenizer": true }, { "name": "Musi", "iso_1_code": "ms", "iso_3_code": "mui", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2902", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Orang Kanaq", "iso_1_code": "ms", "iso_3_code": "orn", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2903", + "scripts": [], + "own_tokenizer": true }, { "name": "Orang Seletar", "iso_1_code": "ms", "iso_3_code": "ors", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2904", + "scripts": [], + "own_tokenizer": true }, { "name": "Pekal", "iso_1_code": "ms", "iso_3_code": "pel", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2905", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Central", "iso_1_code": "ms", "iso_3_code": "pse", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2906", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Temuan", "iso_1_code": "ms", "iso_3_code": "tmw", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2907", + "scripts": [], + "own_tokenizer": true }, { "name": "Kaur", "iso_1_code": "ms", "iso_3_code": "vkk", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2908", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Tenggarong Kutai", "iso_1_code": "ms", "iso_3_code": "vkt", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2909", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay", "iso_1_code": "ms", "iso_3_code": "zlm", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2910", + "scripts": [], + "own_tokenizer": true }, { "name": "Negeri Sembilan Malay", "iso_1_code": "ms", "iso_3_code": "zmi", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2911", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Standard", "iso_1_code": "ms", "iso_3_code": "zsm", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "2912", + "scripts": [ + "Latn", + "Arab" + ], + "own_tokenizer": true } - ] + ], + "node_i": "2879", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2869", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2849", + "scripts": [], + "own_tokenizer": false }, { "name": "Minahasan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tonsawang", "iso_1_code": null, "iso_3_code": "tnw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2914", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tontemboan", "iso_1_code": null, "iso_3_code": "tnt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2916", + "scripts": [], + "own_tokenizer": false }, { "name": "Northeast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tondano", "iso_1_code": null, "iso_3_code": "tdn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2918", + "scripts": [], + "own_tokenizer": false }, { "name": "Tombulu", "iso_1_code": null, "iso_3_code": "tom", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2919", + "scripts": [], + "own_tokenizer": false }, { "name": "Tonsea", "iso_1_code": null, "iso_3_code": "txs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2920", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2917", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2915", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2913", + "scripts": [], + "own_tokenizer": false }, { "name": "Moklen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Moklen", "iso_1_code": null, "iso_3_code": "mkm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2922", + "scripts": [], + "own_tokenizer": false }, { "name": "Moken", "iso_1_code": null, "iso_3_code": "mwt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2923", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2921", + "scripts": [], + "own_tokenizer": false }, { "name": "Nasal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nasal", "iso_1_code": null, "iso_3_code": "nsy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2925", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2924", + "scripts": [], + "own_tokenizer": false }, { "name": "North Borneo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Melanau-Kajang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kajang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bukitan", "iso_1_code": null, "iso_3_code": "bkn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2929", + "scripts": [], + "own_tokenizer": false }, { "name": "Kajaman", "iso_1_code": null, "iso_3_code": "kag", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2930", + "scripts": [], + "own_tokenizer": false }, { "name": "Lahanan", "iso_1_code": null, "iso_3_code": "lhn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2931", + "scripts": [], + "own_tokenizer": false }, { "name": "Punan Batu", "iso_1_code": null, "iso_3_code": "pnm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2932", + "scripts": [], + "own_tokenizer": false }, { "name": "Sekapan", "iso_1_code": null, "iso_3_code": "skp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2933", + "scripts": [], + "own_tokenizer": false }, { "name": "Sihan", "iso_1_code": null, "iso_3_code": "spg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2934", + "scripts": [], + "own_tokenizer": false }, { "name": "Ukit", "iso_1_code": null, "iso_3_code": "umi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2935", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2928", + "scripts": [], + "own_tokenizer": false }, { "name": "Melanau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Melanau, Daro-Matu", "iso_1_code": null, "iso_3_code": "dro", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2937", + "scripts": [], + "own_tokenizer": false }, { "name": "Melanau, Kanowit-Tanjong", "iso_1_code": null, "iso_3_code": "kxn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2938", + "scripts": [], + "own_tokenizer": false }, { "name": "Melanau, Central", "iso_1_code": null, "iso_3_code": "mel", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2939", + "scripts": [], + "own_tokenizer": false }, { "name": "Melanau, Sibu", "iso_1_code": null, "iso_3_code": "sdx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2940", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2936", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2927", + "scripts": [], + "own_tokenizer": false }, { "name": "North Sarawakan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Berawan-Lower Baram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Berawan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Berawan, West", "iso_1_code": null, "iso_3_code": "zbw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2944", + "scripts": [], + "own_tokenizer": false }, { "name": "Central-East Berawan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Berawan, Central", "iso_1_code": null, "iso_3_code": "zbc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2946", + "scripts": [], + "own_tokenizer": false }, { "name": "Berawan, East", "iso_1_code": null, "iso_3_code": "zbe", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2947", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2945", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2943", + "scripts": [], + "own_tokenizer": false }, { "name": "Lower Baram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Belait", "iso_1_code": null, "iso_3_code": "beg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2951", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiput", "iso_1_code": null, "iso_3_code": "kyi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2952", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2950", + "scripts": [], + "own_tokenizer": false }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lelak", "iso_1_code": null, "iso_3_code": "llk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2954", + "scripts": [], + "own_tokenizer": false }, { "name": "Narom", "iso_1_code": null, "iso_3_code": "nrm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2955", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tutong", "iso_1_code": null, "iso_3_code": "ttg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2956", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2953", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2949", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2948", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2942", + "scripts": [], + "own_tokenizer": false }, { "name": "Bintulu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Vaie", "iso_1_code": null, "iso_3_code": "bny", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2958", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2957", + "scripts": [], + "own_tokenizer": false }, { "name": "Dayic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kelabitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kelabit", "iso_1_code": null, "iso_3_code": "kzi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2961", + "scripts": [], + "own_tokenizer": false }, { "name": "Lengilu", "iso_1_code": null, "iso_3_code": "lgi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2962", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundayeh", "iso_1_code": null, "iso_3_code": "lnd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2963", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Putoh", "iso_1_code": null, "iso_3_code": "put", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2964", + "scripts": [], + "own_tokenizer": false }, { "name": "Sa\u2019ban", "iso_1_code": null, "iso_3_code": "snv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2965", + "scripts": [], + "own_tokenizer": false }, { "name": "Tring", "iso_1_code": null, "iso_3_code": "tgq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2966", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2960", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2959", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan-Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kayanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kayan Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kayan, Busang", "iso_1_code": null, "iso_3_code": "bfg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2970", + "scripts": [], + "own_tokenizer": false }, { "name": "Bahau", "iso_1_code": null, "iso_3_code": "bhv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2971", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan, Baram", "iso_1_code": null, "iso_3_code": "kys", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2972", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan, Rejang", "iso_1_code": null, "iso_3_code": "ree", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2973", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan, Wahau", "iso_1_code": null, "iso_3_code": "whu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2974", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan Mahakam", "iso_1_code": null, "iso_3_code": "xay", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2975", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan, Mendalam", "iso_1_code": null, "iso_3_code": "xkd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2976", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan, Kayan River", "iso_1_code": null, "iso_3_code": "xkn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2977", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2969", + "scripts": [], + "own_tokenizer": false }, { "name": "Modang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Modang", "iso_1_code": null, "iso_3_code": "mxd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2979", + "scripts": [], + "own_tokenizer": false }, { "name": "Segai", "iso_1_code": null, "iso_3_code": "sge", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2980", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2978", + "scripts": [], + "own_tokenizer": false }, { "name": "Muller-Schwaner \u2018Punan\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bukat", "iso_1_code": null, "iso_3_code": "bvk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2982", + "scripts": [], + "own_tokenizer": false }, { "name": "Hovongan", "iso_1_code": null, "iso_3_code": "hov", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2983", + "scripts": [], + "own_tokenizer": false }, { "name": "Aoheng", "iso_1_code": null, "iso_3_code": "pni", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2984", + "scripts": [], + "own_tokenizer": false }, { "name": "Punan Aput", "iso_1_code": null, "iso_3_code": "pud", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2985", + "scripts": [], + "own_tokenizer": false }, { "name": "Punan Merah", "iso_1_code": null, "iso_3_code": "puf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2986", + "scripts": [], + "own_tokenizer": false }, { "name": "Kereho", "iso_1_code": null, "iso_3_code": "xke", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2987", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2981", + "scripts": [], + "own_tokenizer": false }, { "name": "Murik Kayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Murik", "iso_1_code": null, "iso_3_code": "mxr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2989", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2988", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2968", + "scripts": [], + "own_tokenizer": false }, { "name": "Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kenyah, Mainstream", "iso_1_code": null, "iso_3_code": "xkl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2991", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayanic Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sebop", "iso_1_code": null, "iso_3_code": "sib", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2993", + "scripts": [], + "own_tokenizer": false }, { "name": "Long Wat", "iso_1_code": null, "iso_3_code": "ttw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2994", + "scripts": [], + "own_tokenizer": false }, { "name": "Kenyah, Wahau", "iso_1_code": null, "iso_3_code": "whk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "2995", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2992", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper Pujungan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Uma\u2019 Lung", "iso_1_code": null, "iso_3_code": "ulu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2997", + "scripts": [], + "own_tokenizer": false }, { "name": "Uma\u2019 Lasan", "iso_1_code": null, "iso_3_code": "xky", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "2998", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2996", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2990", + "scripts": [], + "own_tokenizer": false }, { "name": "Penan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Penan, Eastern", "iso_1_code": null, "iso_3_code": "pez", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3000", + "scripts": [], + "own_tokenizer": false }, { "name": "Penan, Western", "iso_1_code": null, "iso_3_code": "pne", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3001", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "2999", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2967", + "scripts": [], + "own_tokenizer": false }, { "name": "Punan Tubu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Punan Tubu", "iso_1_code": null, "iso_3_code": "puj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3003", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3002", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2941", + "scripts": [], + "own_tokenizer": false }, { "name": "Rejang-Sajau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Basap", "iso_1_code": null, "iso_3_code": "bdb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3005", + "scripts": [], + "own_tokenizer": false }, { "name": "Burusu", "iso_1_code": null, "iso_3_code": "bqr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3006", + "scripts": [], + "own_tokenizer": false }, { "name": "Punan Bah-Biau", "iso_1_code": null, "iso_3_code": "pna", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3007", + "scripts": [], + "own_tokenizer": false }, { "name": "Punan Merap", "iso_1_code": null, "iso_3_code": "puc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3008", + "scripts": [], + "own_tokenizer": false }, { "name": "Sajau Basap", "iso_1_code": null, "iso_3_code": "sjb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3009", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3004", + "scripts": [], + "own_tokenizer": false }, { "name": "Sabahan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dusunic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bisaya-Lotud", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bisaya, Sabah", "iso_1_code": null, "iso_3_code": "bsy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3013", + "scripts": [], + "own_tokenizer": false }, { "name": "Lotud", "iso_1_code": null, "iso_3_code": "dtr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3014", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bisaya, Brunei", "iso_1_code": null, "iso_3_code": "bsb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3016", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3015", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3012", + "scripts": [], + "own_tokenizer": false }, { "name": "Dusun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kuijau", "iso_1_code": null, "iso_3_code": "dkr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3018", + "scripts": [], + "own_tokenizer": false }, { "name": "Rungus", "iso_1_code": null, "iso_3_code": "drg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3019", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kota Marudu Talantang", "iso_1_code": null, "iso_3_code": "grm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3020", + "scripts": [], + "own_tokenizer": false }, { "name": "Kimaragang", "iso_1_code": null, "iso_3_code": "kqr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3021", + "scripts": [], + "own_tokenizer": false }, { "name": "Kadazan, Klias River", "iso_1_code": null, "iso_3_code": "kqt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3022", + "scripts": [], + "own_tokenizer": false }, { "name": "Tobilung", "iso_1_code": null, "iso_3_code": "tgb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3023", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kadazan Dusun", "iso_1_code": null, "iso_3_code": "dtp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3025", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sugut Dusun", "iso_1_code": null, "iso_3_code": "kzs", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3026", + "scripts": [], + "own_tokenizer": false }, { "name": "Minokok", "iso_1_code": null, "iso_3_code": "mqq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3027", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3024", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kadazan, Labuk-Kinabatangan", "iso_1_code": null, "iso_3_code": "dtb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3029", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3028", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3017", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dumpas", "iso_1_code": null, "iso_3_code": "dmv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3031", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3030", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3011", + "scripts": [], + "own_tokenizer": false }, { "name": "Ida\u2019an", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ida\u2019an", "iso_1_code": null, "iso_3_code": "dbj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3033", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3032", + "scripts": [], + "own_tokenizer": false }, { "name": "Murutic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Murut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Okolod", "iso_1_code": null, "iso_3_code": "kqv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3036", + "scripts": [], + "own_tokenizer": false }, { "name": "Murut, Keningau", "iso_1_code": null, "iso_3_code": "kxi", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3037", + "scripts": [], + "own_tokenizer": false }, { "name": "Murut, Tahol", "iso_1_code": null, "iso_3_code": "mvv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3038", + "scripts": [], + "own_tokenizer": false }, { "name": "Murut, Paluan", "iso_1_code": null, "iso_3_code": "plz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3039", + "scripts": [], + "own_tokenizer": false }, { "name": "Murut, Selungai", "iso_1_code": null, "iso_3_code": "slg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3040", + "scripts": [], + "own_tokenizer": false }, { "name": "Murut, Timugon", "iso_1_code": null, "iso_3_code": "tih", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3041", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3035", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Murut, Bookan", "iso_1_code": null, "iso_3_code": "bnb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3043", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3042", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Murut, Kalabakan", "iso_1_code": null, "iso_3_code": "kve", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3045", + "scripts": [], + "own_tokenizer": false }, { "name": "Murut, Sembakung", "iso_1_code": null, "iso_3_code": "sbr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3046", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3044", + "scripts": [], + "own_tokenizer": false }, { "name": "Tidung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tidung, Southern", "iso_1_code": null, "iso_3_code": "itd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3048", + "scripts": [], + "own_tokenizer": false }, { "name": "Tidung, Northern", "iso_1_code": null, "iso_3_code": "ntd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3049", + "scripts": [], + "own_tokenizer": false }, { "name": "Murut, Serudung", "iso_1_code": null, "iso_3_code": "srk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3050", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3047", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Papar", "iso_1_code": null, "iso_3_code": "dpp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3052", + "scripts": [], + "own_tokenizer": false }, { "name": "Gana", "iso_1_code": null, "iso_3_code": "gnq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3053", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3051", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3034", + "scripts": [], + "own_tokenizer": false }, { "name": "Paitanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Abai Sungai", "iso_1_code": null, "iso_3_code": "abf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3055", + "scripts": [], + "own_tokenizer": false }, { "name": "Tombonuo", "iso_1_code": null, "iso_3_code": "txa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3056", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper Kinabatangan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kinabatangan, Upper", "iso_1_code": null, "iso_3_code": "dmg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3058", + "scripts": [], + "own_tokenizer": false }, { "name": "Lobu, Tampias", "iso_1_code": null, "iso_3_code": "low", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3059", + "scripts": [], + "own_tokenizer": false }, { "name": "Lobu, Lanas", "iso_1_code": null, "iso_3_code": "ruu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3060", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3057", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3054", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tatana", "iso_1_code": null, "iso_3_code": "txx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3062", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3061", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3010", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "2926", + "scripts": [], + "own_tokenizer": false }, { "name": "North Mangyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Alangan", "iso_1_code": null, "iso_3_code": "alj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3064", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Iraya", "iso_1_code": null, "iso_3_code": "iry", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3065", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tadyawan", "iso_1_code": null, "iso_3_code": "tdy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3066", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3063", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Arta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Arta", "iso_1_code": null, "iso_3_code": "atz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3069", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3068", + "scripts": [], + "own_tokenizer": false }, { "name": "Ilocano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ilocano", "iso_1_code": null, "iso_3_code": "ilo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3071", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3070", + "scripts": [], + "own_tokenizer": false }, { "name": "Meso-Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Alta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Alta, Southern", "iso_1_code": null, "iso_3_code": "agy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3074", + "scripts": [], + "own_tokenizer": false }, { "name": "Alta, Northern", "iso_1_code": null, "iso_3_code": "aqn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3075", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3073", + "scripts": [], + "own_tokenizer": false }, { "name": "South-Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Isinai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Isinay", "iso_1_code": null, "iso_3_code": "inn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3079", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3078", + "scripts": [], + "own_tokenizer": false }, { "name": "North Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kalinga-Itneg", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Itneg", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Itneg, Binongan", "iso_1_code": null, "iso_3_code": "itb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3083", + "scripts": [], + "own_tokenizer": false }, { "name": "Itneg, Inlaud", "iso_1_code": null, "iso_3_code": "iti", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3084", + "scripts": [], + "own_tokenizer": false }, { "name": "Itneg, Maeng", "iso_1_code": null, "iso_3_code": "itt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3085", + "scripts": [], + "own_tokenizer": false }, { "name": "Itneg, Moyadan", "iso_1_code": null, "iso_3_code": "ity", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3086", + "scripts": [], + "own_tokenizer": false }, { "name": "Itneg, Masadiit", "iso_1_code": null, "iso_3_code": "tis", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3087", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3082", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalinga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kalinga, Vanaw", "iso_1_code": null, "iso_3_code": "bjx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3089", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalinga, Mabaka Valley", "iso_1_code": null, "iso_3_code": "kkg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3090", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalinga, Majukayang", "iso_1_code": null, "iso_3_code": "kmd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3091", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kalinga, Limos", "iso_1_code": null, "iso_3_code": "kmk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3092", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kalinga, Tanudan", "iso_1_code": null, "iso_3_code": "kml", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3093", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalinga, Lubuagan", "iso_1_code": null, "iso_3_code": "knb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3094", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalinga, Southern", "iso_1_code": null, "iso_3_code": "ksc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3095", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kalinga, Butbut", "iso_1_code": null, "iso_3_code": "kyb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3096", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3088", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3081", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Balangaw", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Balangao", "iso_1_code": null, "iso_3_code": "blw", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3099", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3098", + "scripts": [], + "own_tokenizer": false }, { "name": "Bontok-Kankanay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bontok", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bontok, Eastern", "iso_1_code": null, "iso_3_code": "ebk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3102", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bontok, Central", "iso_1_code": null, "iso_3_code": "lbk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3103", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bontok, Southern", "iso_1_code": null, "iso_3_code": "obk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3104", + "scripts": [], + "own_tokenizer": false }, { "name": "Bontok, Northern", "iso_1_code": null, "iso_3_code": "rbk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3105", + "scripts": [], + "own_tokenizer": false }, { "name": "Bontok, Southwestern", "iso_1_code": null, "iso_3_code": "vbk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3106", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3101", + "scripts": [], + "own_tokenizer": false }, { "name": "Kankanay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kankanaey", "iso_1_code": null, "iso_3_code": "kne", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3108", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kankanay, Northern", "iso_1_code": null, "iso_3_code": "xnn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3109", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3107", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3100", + "scripts": [], + "own_tokenizer": false }, { "name": "Ifugaw", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ifugao, Amganad", "iso_1_code": null, "iso_3_code": "ifa", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3111", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ifugao, Batad", "iso_1_code": null, "iso_3_code": "ifb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3112", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ifugao, Tuwali", "iso_1_code": null, "iso_3_code": "ifk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3113", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ifugao, Mayoyao", "iso_1_code": null, "iso_3_code": "ifu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3114", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3110", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3097", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3080", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3077", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ilongot", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bogkalot", "iso_1_code": null, "iso_3_code": "ilk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3117", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3116", + "scripts": [], + "own_tokenizer": false }, { "name": "West Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pangasinan", "iso_1_code": null, "iso_3_code": "pag", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3119", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nuclear Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ibaloy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ibaloi", "iso_1_code": null, "iso_3_code": "ibl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3122", + "scripts": [], + "own_tokenizer": false }, { "name": "I-wak", "iso_1_code": null, "iso_3_code": "iwk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3123", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3121", + "scripts": [], + "own_tokenizer": false }, { "name": "Kallahan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kallahan, Keley-i", "iso_1_code": null, "iso_3_code": "ify", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3125", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kalanguya", "iso_1_code": null, "iso_3_code": "kak", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3126", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3124", + "scripts": [], + "own_tokenizer": false }, { "name": "Karaw", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Karao", "iso_1_code": null, "iso_3_code": "kyj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3128", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3127", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3120", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3118", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3115", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3076", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3072", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Cagayan Valley", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ibanagic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Atta, Pudtol", "iso_1_code": null, "iso_3_code": "atp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3132", + "scripts": [], + "own_tokenizer": false }, { "name": "Atta, Pamplona", "iso_1_code": null, "iso_3_code": "att", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3133", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Atta, Faire", "iso_1_code": null, "iso_3_code": "azt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3134", + "scripts": [], + "own_tokenizer": false }, { "name": "Ibanag", "iso_1_code": null, "iso_3_code": "ibg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3135", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Itawit", "iso_1_code": null, "iso_3_code": "itv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3136", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yogad", "iso_1_code": null, "iso_3_code": "yog", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3137", + "scripts": [], + "own_tokenizer": false }, { "name": "Gaddangic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Agta, Central Cagayan", "iso_1_code": null, "iso_3_code": "agt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3139", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gaddang", "iso_1_code": null, "iso_3_code": "gad", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3140", + "scripts": [], + "own_tokenizer": false }, { "name": "Ga\ua78cdang", "iso_1_code": null, "iso_3_code": "gdg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3141", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3138", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3131", + "scripts": [], + "own_tokenizer": false }, { "name": "Isnag", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Isnag", "iso_1_code": null, "iso_3_code": "isd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3143", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Adasen", "iso_1_code": null, "iso_3_code": "tiu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3144", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3142", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3130", + "scripts": [], + "own_tokenizer": false }, { "name": "Northeastern Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Agta, Pahanan", "iso_1_code": null, "iso_3_code": "apf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3146", + "scripts": [], + "own_tokenizer": false }, { "name": "Paranan", "iso_1_code": null, "iso_3_code": "prf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3147", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Agta, Casiguran Dumagat", "iso_1_code": null, "iso_3_code": "dgc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3149", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Agta, Dupaninan", "iso_1_code": null, "iso_3_code": "duo", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3150", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Agta, Dicamay", "iso_1_code": null, "iso_3_code": "duy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3151", + "scripts": [], + "own_tokenizer": false }, { "name": "Kasiguranin", "iso_1_code": null, "iso_3_code": "ksn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3152", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3148", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3145", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3129", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3067", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwest Sumatra-Barrier Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Enggano", "iso_1_code": null, "iso_3_code": "eno", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3154", + "scripts": [], + "own_tokenizer": false }, { "name": "Gayo", "iso_1_code": null, "iso_3_code": "gay", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3155", + "scripts": [], + "own_tokenizer": false }, { "name": "Mentawai", "iso_1_code": null, "iso_3_code": "mwv", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3156", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Simeulue", "iso_1_code": null, "iso_3_code": "smr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3157", + "scripts": [], + "own_tokenizer": false }, { "name": "Batak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Batak Dairi", "iso_1_code": null, "iso_3_code": "btd", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3160", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Batak Karo", "iso_1_code": null, "iso_3_code": "btx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3161", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Batak Alas-Kluet", "iso_1_code": null, "iso_3_code": "btz", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3162", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3159", + "scripts": [], + "own_tokenizer": false }, { "name": "Simalungan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Batak Simalungun", "iso_1_code": null, "iso_3_code": "bts", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3164", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3163", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Batak Angkola", "iso_1_code": null, "iso_3_code": "akb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3166", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Batak Toba", "iso_1_code": null, "iso_3_code": "bbc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3167", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Batak Mandailing", "iso_1_code": null, "iso_3_code": "btm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3168", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3165", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3158", + "scripts": [], + "own_tokenizer": false }, { "name": "Nias", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nias", "iso_1_code": null, "iso_3_code": "nia", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3170", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sikule", "iso_1_code": null, "iso_3_code": "skh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3171", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3169", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3153", + "scripts": [], + "own_tokenizer": false }, { "name": "Palauan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Palauan", "iso_1_code": null, "iso_3_code": "pau", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3173", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3172", + "scripts": [], + "own_tokenizer": false }, { "name": "Rejang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rejang", "iso_1_code": null, "iso_3_code": "rej", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3175", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3174", + "scripts": [], + "own_tokenizer": false }, { "name": "Sangiric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sangil", "iso_1_code": null, "iso_3_code": "snl", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3178", + "scripts": [], + "own_tokenizer": false }, { "name": "Sangir", "iso_1_code": null, "iso_3_code": "sxn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3179", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Talaud", "iso_1_code": null, "iso_3_code": "tld", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3180", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3177", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bantik", "iso_1_code": null, "iso_3_code": "bnq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3182", + "scripts": [], + "own_tokenizer": false }, { "name": "Ratahan", "iso_1_code": null, "iso_3_code": "rth", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3183", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3181", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3176", + "scripts": [], + "own_tokenizer": false }, { "name": "South Sulawesi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bugis", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bugis", "iso_1_code": null, "iso_3_code": "bug", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3186", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Koneq-koneq", "iso_1_code": null, "iso_3_code": "cml", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3187", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Embaloh", "iso_1_code": null, "iso_3_code": "emb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3189", + "scripts": [], + "own_tokenizer": false }, { "name": "Taman", "iso_1_code": null, "iso_3_code": "tmn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3190", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3188", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3185", + "scripts": [], + "own_tokenizer": false }, { "name": "Lemolang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Limola", "iso_1_code": null, "iso_3_code": "ley", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3192", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3191", + "scripts": [], + "own_tokenizer": false }, { "name": "Makassar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bentong", "iso_1_code": null, "iso_3_code": "bnu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3194", + "scripts": [], + "own_tokenizer": false }, { "name": "Konjo, Coastal", "iso_1_code": null, "iso_3_code": "kjc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3195", + "scripts": [], + "own_tokenizer": false }, { "name": "Konjo, Highland", "iso_1_code": null, "iso_3_code": "kjk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3196", + "scripts": [], + "own_tokenizer": false }, { "name": "Makasar", "iso_1_code": null, "iso_3_code": "mak", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3197", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Selayar", "iso_1_code": null, "iso_3_code": "sly", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3198", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3193", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mamuju", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mamuju", "iso_1_code": null, "iso_3_code": "mqx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3200", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Mandar", "iso_1_code": null, "iso_3_code": "mdr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3203", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3202", + "scripts": [], + "own_tokenizer": false }, { "name": "Masenrempulu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Malimpung", "iso_1_code": null, "iso_3_code": "mli", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3205", + "scripts": [], + "own_tokenizer": false }, { "name": "Duri", "iso_1_code": null, "iso_3_code": "mvp", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3206", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Enrekang", "iso_1_code": null, "iso_3_code": "ptt", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3207", + "scripts": [], + "own_tokenizer": false }, { "name": "Maiwa", "iso_1_code": null, "iso_3_code": "wmm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3208", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3204", + "scripts": [], + "own_tokenizer": false }, { "name": "Pitu Ulunna Salu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aralle-Tabulahan", "iso_1_code": null, "iso_3_code": "atq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3210", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dakka", "iso_1_code": null, "iso_3_code": "dkk", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3211", + "scripts": [], + "own_tokenizer": false }, { "name": "Pannei", "iso_1_code": null, "iso_3_code": "pnc", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3212", + "scripts": [], + "own_tokenizer": false }, { "name": "Bambam", "iso_1_code": null, "iso_3_code": "ptu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3213", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ulumanda\u2019", "iso_1_code": null, "iso_3_code": "ulm", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3214", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3209", + "scripts": [], + "own_tokenizer": false }, { "name": "Toraja-Sa\u2019dan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kalumpang", "iso_1_code": null, "iso_3_code": "kli", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3216", + "scripts": [], + "own_tokenizer": false }, { "name": "Mamasa", "iso_1_code": null, "iso_3_code": "mqj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3217", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tae\u2019", "iso_1_code": null, "iso_3_code": "rob", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3218", + "scripts": [], + "own_tokenizer": false }, { "name": "Toraja-Sa\u2019dan", "iso_1_code": null, "iso_3_code": "sda", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3219", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Talondo\u2019", "iso_1_code": null, "iso_3_code": "tln", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3220", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3215", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3199", + "scripts": [], + "own_tokenizer": false }, { "name": "Seko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Seko Tengah", "iso_1_code": null, "iso_3_code": "sko", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3222", + "scripts": [], + "own_tokenizer": false }, { "name": "Seko Padang", "iso_1_code": null, "iso_3_code": "skx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3223", + "scripts": [], + "own_tokenizer": false }, { "name": "Panasuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Budong-Budong", "iso_1_code": null, "iso_3_code": "bdx", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3225", + "scripts": [], + "own_tokenizer": false }, { "name": "Panasuan", "iso_1_code": null, "iso_3_code": "psn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3226", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3224", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3221", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3184", + "scripts": [], + "own_tokenizer": false }, { "name": "Sundanese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Badui", "iso_1_code": null, "iso_3_code": "bac", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3228", + "scripts": [], + "own_tokenizer": false }, { "name": "Sunda", "iso_1_code": "su", "iso_3_code": "sun", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3229", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3227", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bulungan", "iso_1_code": null, "iso_3_code": "blj", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3231", + "scripts": [], + "own_tokenizer": false }, { "name": "Gorap", "iso_1_code": null, "iso_3_code": "goq", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3232", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3230", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1452", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwest Formosan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pazeh", "iso_1_code": null, "iso_3_code": "pzh", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3234", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulon", "iso_1_code": null, "iso_3_code": "uon", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3235", + "scripts": [], + "own_tokenizer": false }, { "name": "Saisiyat", "iso_1_code": null, "iso_3_code": "xsy", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3236", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3233", + "scripts": [], + "own_tokenizer": false }, { "name": "Paiwan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Paiwan", "iso_1_code": null, "iso_3_code": "pwn", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3238", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3237", + "scripts": [], + "own_tokenizer": false }, { "name": "Puyuma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Puyuma", "iso_1_code": null, "iso_3_code": "pyu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3240", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3239", + "scripts": [], + "own_tokenizer": false }, { "name": "Rukai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rukai", "iso_1_code": null, "iso_3_code": "dru", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3242", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3241", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsouic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Saaroa", "iso_1_code": null, "iso_3_code": "sxr", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3244", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsou", "iso_1_code": null, "iso_3_code": "tsu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3245", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanakanabu", "iso_1_code": null, "iso_3_code": "xnb", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3246", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3243", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ketangalan", "iso_1_code": null, "iso_3_code": "kae", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3248", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3247", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Plains", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central Western Plains", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Babuza", "iso_1_code": null, "iso_3_code": "bzg", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3251", + "scripts": [], + "own_tokenizer": false }, { "name": "Papora-Hoanya", "iso_1_code": null, "iso_3_code": "ppu", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3252", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3250", + "scripts": [], + "own_tokenizer": false }, { "name": "Thao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Thao", "iso_1_code": null, "iso_3_code": "ssf", - "tokenizer": { - "name": "tagalog", - "tokenizer": "SpaCyTokenizer(\"tl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3254", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3253", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3249", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "1436", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Aymaran.json b/data/Aymaran.json index d1d67739b5d3ef7e44a40188e09e578b01141b86..3f259765772f4bc9dd9a9ba67d875d0755e41437 100644 --- a/data/Aymaran.json +++ b/data/Aymaran.json @@ -2,50 +2,64 @@ "name": "Aymaran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aymara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aymara, Southern", "iso_1_code": "ay", "iso_3_code": "ayc", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3257", + "scripts": [], + "own_tokenizer": false }, { "name": "Aymara, Central", "iso_1_code": "ay", "iso_3_code": "ayr", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3258", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3256", + "scripts": [], + "own_tokenizer": false }, { "name": "Tupe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jaqaru", "iso_1_code": null, "iso_3_code": "jqr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3260", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3259", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3255", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Barbacoan.json b/data/Barbacoan.json index 401257a14f2f636d5dad4bcbc12992aa3acaff57..e783f605709fa10c659a97bbc5ddfeaf6835ce3b 100644 --- a/data/Barbacoan.json +++ b/data/Barbacoan.json @@ -2,50 +2,68 @@ "name": "Barbacoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awa-Cuaiquer", "iso_1_code": null, "iso_3_code": "kwi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3263", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3262", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chachi", "iso_1_code": null, "iso_3_code": "cbi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3265", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tsafiki", "iso_1_code": null, "iso_3_code": "cof", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3266", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3264", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3261", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Bayono-Awbono.json b/data/Bayono-Awbono.json index 58784c6abc381f9a3c7654f710527a50402eda73..8da445249315bbd016c7711ff93e82a3c7857c51 100644 --- a/data/Bayono-Awbono.json +++ b/data/Bayono-Awbono.json @@ -2,24 +2,30 @@ "name": "Bayono-Awbono", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awbono", "iso_1_code": null, "iso_3_code": "awh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3268", + "scripts": [], + "own_tokenizer": false }, { "name": "Bayono", "iso_1_code": null, "iso_3_code": "byl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3269", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3267", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Border.json b/data/Border.json index 9d67bc48a6559a150a2f0e8402cd99c95a93ff78..8547621b8b8ff9764058ec3b7c3e843196cc371a 100644 --- a/data/Border.json +++ b/data/Border.json @@ -2,155 +2,197 @@ "name": "Border", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bewani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ainbai", "iso_1_code": null, "iso_3_code": "aic", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3272", + "scripts": [], + "own_tokenizer": false }, { "name": "Kilmeri", "iso_1_code": null, "iso_3_code": "kih", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3273", + "scripts": [], + "own_tokenizer": false }, { "name": "Ningera", "iso_1_code": null, "iso_3_code": "nby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3274", + "scripts": [], + "own_tokenizer": false }, { "name": "Pagi", "iso_1_code": null, "iso_3_code": "pgi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3275", + "scripts": [], + "own_tokenizer": false }, { "name": "Umeda", "iso_1_code": null, "iso_3_code": "upi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3276", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3271", + "scripts": [], + "own_tokenizer": false }, { "name": "Taikat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Taikat", "iso_1_code": null, "iso_3_code": "aos", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3278", + "scripts": [], + "own_tokenizer": false }, { "name": "Awyi", "iso_1_code": null, "iso_3_code": "auw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3279", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3277", + "scripts": [], + "own_tokenizer": false }, { "name": "Waris", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amanab", "iso_1_code": null, "iso_3_code": "amn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3281", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Daonda", "iso_1_code": null, "iso_3_code": "dnd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3282", + "scripts": [], + "own_tokenizer": false }, { "name": "Imonda", "iso_1_code": null, "iso_3_code": "imn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3283", + "scripts": [], + "own_tokenizer": false }, { "name": "Manem", "iso_1_code": null, "iso_3_code": "jet", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3284", + "scripts": [], + "own_tokenizer": false }, { "name": "Auwe", "iso_1_code": null, "iso_3_code": "smf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3285", + "scripts": [], + "own_tokenizer": false }, { "name": "Viid", "iso_1_code": null, "iso_3_code": "snu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3286", + "scripts": [], + "own_tokenizer": false }, { "name": "Sowanda", "iso_1_code": null, "iso_3_code": "sow", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3287", + "scripts": [], + "own_tokenizer": false }, { "name": "Waris", "iso_1_code": null, "iso_3_code": "wrs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3288", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3280", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3270", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Bororoan.json b/data/Bororoan.json index 430922759dfb6f346fcbf163e69de88f93645ad2..f2d1e7168aead9d439066d8dc684af2160f7d24a 100644 --- a/data/Bororoan.json +++ b/data/Bororoan.json @@ -2,32 +2,42 @@ "name": "Bororoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bor\u00f4ro", "iso_1_code": null, "iso_3_code": "bor", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3290", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Otuke", "iso_1_code": null, "iso_3_code": "otu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3291", + "scripts": [], + "own_tokenizer": false }, { "name": "Umot\u00edna", "iso_1_code": null, "iso_3_code": "umo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3292", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3289", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Botocudoan.json b/data/Botocudoan.json index 24e6b5f45dba3b77d64fb5e8d5def476b524545f..86bdd6205115b9d94617dfe2121aa73943284dcf 100644 --- a/data/Botocudoan.json +++ b/data/Botocudoan.json @@ -2,16 +2,20 @@ "name": "Botocudoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Krenak", "iso_1_code": null, "iso_3_code": "kqq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3294", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3293", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Caddoan.json b/data/Caddoan.json index 569c8abf6dc27cfe0d4bec8355cadfb4b67110a1..fa6ddb4d47d7d3fc4aad44e8937d31c5a49518a5 100644 --- a/data/Caddoan.json +++ b/data/Caddoan.json @@ -2,75 +2,93 @@ "name": "Caddoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Caddo", "iso_1_code": null, "iso_3_code": "cad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3296", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Caddoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wichita", "iso_1_code": null, "iso_3_code": "wic", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3298", + "scripts": [], + "own_tokenizer": false }, { "name": "Kitsai-Proto-Pawnee", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kitsai", "iso_1_code": null, "iso_3_code": "kii", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3300", + "scripts": [], + "own_tokenizer": false }, { "name": "Proto-Pawnee", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arikara", "iso_1_code": null, "iso_3_code": "ari", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3302", + "scripts": [], + "own_tokenizer": false }, { "name": "Pawnee", "iso_1_code": null, "iso_3_code": "paw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3303", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3301", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3299", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3297", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3295", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Cahuapanan.json b/data/Cahuapanan.json index 121c410ba8c4b8ec5919f6ee2f1af4fb576544c4..8c4d970d29e042d809c0227f83349fa48ce61ec3 100644 --- a/data/Cahuapanan.json +++ b/data/Cahuapanan.json @@ -2,24 +2,32 @@ "name": "Cahuapanan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Shawi", "iso_1_code": null, "iso_3_code": "cbt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3305", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jebero", "iso_1_code": null, "iso_3_code": "jeb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3306", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3304", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Cariban.json b/data/Cariban.json index 040355c05cdaf1c944421bd1fe27d4f05a33edb6..d9a8df9c09bd165d186c6ccc01f41675443ccb62 100644 --- a/data/Cariban.json +++ b/data/Cariban.json @@ -2,445 +2,569 @@ "name": "Cariban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Carib", "iso_1_code": null, "iso_3_code": "car", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3308", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apala\u00ed", "iso_1_code": null, "iso_3_code": "apy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3310", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cuman\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chaima", "iso_1_code": null, "iso_3_code": "ciy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3312", + "scripts": [], + "own_tokenizer": false }, { "name": "Cumanagoto", "iso_1_code": null, "iso_3_code": "cuo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3313", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3311", + "scripts": [], + "own_tokenizer": false }, { "name": "Makiritare", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Maquiritari", "iso_1_code": null, "iso_3_code": "mch", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3315", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3314", + "scripts": [], + "own_tokenizer": false }, { "name": "Mapoyo-Yavarana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mapoyo", "iso_1_code": null, "iso_3_code": "mcg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3317", + "scripts": [], + "own_tokenizer": false }, { "name": "P\u00e9mono", "iso_1_code": null, "iso_3_code": "pev", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3318", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamanaku", "iso_1_code": null, "iso_3_code": "tmz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3319", + "scripts": [], + "own_tokenizer": false }, { "name": "Yabarana", "iso_1_code": null, "iso_3_code": "yar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3320", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3316", + "scripts": [], + "own_tokenizer": false }, { "name": "Wayana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaxui\u00e2na", "iso_1_code": null, "iso_3_code": "kbb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3322", + "scripts": [], + "own_tokenizer": false }, { "name": "Wayana", "iso_1_code": null, "iso_3_code": "way", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3323", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3321", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3309", + "scripts": [], + "own_tokenizer": false }, { "name": "Kashuyana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sikiana", "iso_1_code": null, "iso_3_code": "sik", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3325", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3324", + "scripts": [], + "own_tokenizer": false }, { "name": "North Amazonian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pem\u00f3n", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pem\u00f3n proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pemon", "iso_1_code": null, "iso_3_code": "aoc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3329", + "scripts": [], + "own_tokenizer": false }, { "name": "Macushi", "iso_1_code": null, "iso_3_code": "mbc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3330", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kapong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Akawaio", "iso_1_code": null, "iso_3_code": "ake", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3332", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Patamona", "iso_1_code": null, "iso_3_code": "pbc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3333", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3331", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3328", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3327", + "scripts": [], + "own_tokenizer": false }, { "name": "Yawaper\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Waimiri-Atroar\u00ed", "iso_1_code": null, "iso_3_code": "atr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3335", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3334", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3326", + "scripts": [], + "own_tokenizer": false }, { "name": "South Amazonian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "E\u2019\u00f1apa Woromaipu", "iso_1_code": null, "iso_3_code": "pbh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3337", + "scripts": [], + "own_tokenizer": false }, { "name": "Arara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arara, Par\u00e1", "iso_1_code": null, "iso_3_code": "aap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3339", + "scripts": [], + "own_tokenizer": false }, { "name": "Ikpeng", "iso_1_code": null, "iso_3_code": "txi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3340", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3338", + "scripts": [], + "own_tokenizer": false }, { "name": "Bakair\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bakair\u00ed", "iso_1_code": null, "iso_3_code": "bkq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3342", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Amonap", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kuik\u00faro-Kalap\u00e1lo", "iso_1_code": null, "iso_3_code": "kui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3344", + "scripts": [], + "own_tokenizer": false }, { "name": "Matipuhy", "iso_1_code": null, "iso_3_code": "mzo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3345", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3343", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3341", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3336", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiriy\u00f3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Salum\u00e1", "iso_1_code": null, "iso_3_code": "slj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3347", + "scripts": [], + "own_tokenizer": false }, { "name": "Karihona", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Carijona", "iso_1_code": null, "iso_3_code": "cbd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3349", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3348", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiriy\u00f3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Akurio", "iso_1_code": null, "iso_3_code": "ako", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3351", + "scripts": [], + "own_tokenizer": false }, { "name": "Tri\u00f3", "iso_1_code": null, "iso_3_code": "tri", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3352", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3350", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3346", + "scripts": [], + "own_tokenizer": false }, { "name": "Waiwai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hixkary\u00e1na", "iso_1_code": null, "iso_3_code": "hix", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3354", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Waiwai", "iso_1_code": null, "iso_3_code": "waw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3355", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3353", + "scripts": [], + "own_tokenizer": false }, { "name": "Yukpa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yucpa-Yapreria", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Japreria", "iso_1_code": null, "iso_3_code": "jru", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3358", + "scripts": [], + "own_tokenizer": false }, { "name": "Yukpa", "iso_1_code": null, "iso_3_code": "yup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3359", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3357", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3356", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3307", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Central Solomons.json b/data/Central Solomons.json index b6bcd3aa2dd87009f093ca17e0f6187d01121a82..94b78fb745810a113bf2c56e2d0efea2ede8381e 100644 --- a/data/Central Solomons.json +++ b/data/Central Solomons.json @@ -2,40 +2,50 @@ "name": "Central Solomons", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bilua", "iso_1_code": null, "iso_3_code": "blb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3361", + "scripts": [], + "own_tokenizer": false }, { "name": "Lavukaleve", "iso_1_code": null, "iso_3_code": "lvk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3362", + "scripts": [], + "own_tokenizer": false }, { "name": "Savosavo", "iso_1_code": null, "iso_3_code": "svs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3363", + "scripts": [], + "own_tokenizer": false }, { "name": "Touo", "iso_1_code": null, "iso_3_code": "tqu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3364", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3360", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chapacuran.json b/data/Chapacuran.json index 7d6d0322737180d3673bfec17a2b1a4dbe80fa2d..df9b38a2b14dc441a4c27f7eabdae1b2531421ab 100644 --- a/data/Chapacuran.json +++ b/data/Chapacuran.json @@ -2,58 +2,72 @@ "name": "Chapacuran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Itene", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Itene", "iso_1_code": null, "iso_3_code": "ite", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3367", + "scripts": [], + "own_tokenizer": false }, { "name": "Tor\u00e1", "iso_1_code": null, "iso_3_code": "trz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3368", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3366", + "scripts": [], + "own_tokenizer": false }, { "name": "Wari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Oro Win", "iso_1_code": null, "iso_3_code": "orw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3370", + "scripts": [], + "own_tokenizer": false }, { "name": "Paka\u00e1snovos", "iso_1_code": null, "iso_3_code": "pav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3371", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3369", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3365", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chibchan.json b/data/Chibchan.json index e31ec365f398204e109f7d41a99cee8562ae1633..0b1c2e6029c6f28e03d6284286fa87b3dace0b3e 100644 --- a/data/Chibchan.json +++ b/data/Chibchan.json @@ -2,302 +2,392 @@ "name": "Chibchan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chibchan A", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Boruca", "iso_1_code": null, "iso_3_code": "brn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3374", + "scripts": [], + "own_tokenizer": false }, { "name": "Teribe", "iso_1_code": null, "iso_3_code": "tfr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3375", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guaymi\u00edc", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ng\u00e4bere", "iso_1_code": null, "iso_3_code": "gym", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3377", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Buglere", "iso_1_code": null, "iso_3_code": "sab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3378", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3376", + "scripts": [], + "own_tokenizer": false }, { "name": "Viceitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bribri", "iso_1_code": null, "iso_3_code": "bzd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3380", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cab\u00e9car", "iso_1_code": null, "iso_3_code": "cjp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3381", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3379", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3373", + "scripts": [], + "own_tokenizer": false }, { "name": "Chibchan B", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pech", "iso_1_code": null, "iso_3_code": "pay", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3383", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Chibchan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Colombian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern Colombian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chimila", "iso_1_code": null, "iso_3_code": "cbg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3387", + "scripts": [], + "own_tokenizer": false }, { "name": "Arhuacan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kogi", "iso_1_code": null, "iso_3_code": "kog", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3389", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Southern and Eastern Arhuacan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arhuaco", "iso_1_code": null, "iso_3_code": "arh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3391", + "scripts": [], + "own_tokenizer": false }, { "name": "Guamaca-Atanque", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sanka", "iso_1_code": null, "iso_3_code": "mbp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3393", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3392", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3390", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3388", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3386", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Colombian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bar\u00ed", "iso_1_code": null, "iso_3_code": "mot", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3395", + "scripts": [], + "own_tokenizer": false }, { "name": "Cundicocuyese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chibcha", "iso_1_code": null, "iso_3_code": "chb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3397", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunebo, Barro Negro", "iso_1_code": null, "iso_3_code": "tbn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3398", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunebo, Western", "iso_1_code": null, "iso_3_code": "tnb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3399", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunebo, Angosturas", "iso_1_code": null, "iso_3_code": "tnd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3400", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunebo, Central", "iso_1_code": null, "iso_3_code": "tuf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3401", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3396", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3394", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3385", + "scripts": [], + "own_tokenizer": false }, { "name": "Cuna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kuna, San Blas", "iso_1_code": null, "iso_3_code": "cuk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3403", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kuna, Border", "iso_1_code": null, "iso_3_code": "kvn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3404", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3402", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3384", + "scripts": [], + "own_tokenizer": false }, { "name": "Votic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mal\u00e9ku Ja\u00edka", "iso_1_code": null, "iso_3_code": "gut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3406", + "scripts": [], + "own_tokenizer": false }, { "name": "Rama", "iso_1_code": null, "iso_3_code": "rma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3407", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3405", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3382", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3372", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chimakuan.json b/data/Chimakuan.json index 08548a9b115fa15ee7fd510e31d2044cbbe7988b..be5167ccff251c9324d9756490815ffde8bc711c 100644 --- a/data/Chimakuan.json +++ b/data/Chimakuan.json @@ -2,24 +2,30 @@ "name": "Chimakuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quileute", "iso_1_code": null, "iso_3_code": "qui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3409", + "scripts": [], + "own_tokenizer": false }, { "name": "Chemakum", "iso_1_code": null, "iso_3_code": "xch", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3410", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3408", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chinookan.json b/data/Chinookan.json index 209c07e86d7c3e07830b6571c3d92a842b32dcf9..45bf668a32e57c7f9535f7eda9a4273178078855 100644 --- a/data/Chinookan.json +++ b/data/Chinookan.json @@ -2,33 +2,41 @@ "name": "Chinookan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chinook", "iso_1_code": null, "iso_3_code": "chh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3412", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper Chinookan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wasco-Wishram", "iso_1_code": null, "iso_3_code": "wac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3414", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3413", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3411", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chipaya-Uru.json b/data/Chipaya-Uru.json index 32988a7076d99abce2f4fb94766aa549cde599f9..432e3f49fbbdd6a089ea5afe30cd0e2e08eefb97 100644 --- a/data/Chipaya-Uru.json +++ b/data/Chipaya-Uru.json @@ -2,24 +2,32 @@ "name": "Chipaya-Uru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chipaya", "iso_1_code": null, "iso_3_code": "cap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3416", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Uru", "iso_1_code": null, "iso_3_code": "ure", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3417", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3415", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chocoan.json b/data/Chocoan.json index a0141c1a1e31135b4706d93ce31be22b678f4ec8..6546ac5ea5e79d67bce295c68c34530deb363711 100644 --- a/data/Chocoan.json +++ b/data/Chocoan.json @@ -2,91 +2,121 @@ "name": "Chocoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Woun Meu", "iso_1_code": null, "iso_3_code": "noa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3419", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ember\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern Ember\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Embera Cat\u00edo", "iso_1_code": null, "iso_3_code": "cto", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3422", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ember\u00e1, Northern", "iso_1_code": null, "iso_3_code": "emp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3423", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3421", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Ember\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Embera Baud\u00f3", "iso_1_code": null, "iso_3_code": "bdc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3425", + "scripts": [], + "own_tokenizer": false }, { "name": "Embera Cham\u00ed", "iso_1_code": null, "iso_3_code": "cmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3426", + "scripts": [], + "own_tokenizer": false }, { "name": "Epena", "iso_1_code": null, "iso_3_code": "sja", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3427", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Embera Tad\u00f3", "iso_1_code": null, "iso_3_code": "tdc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3428", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3424", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3420", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3418", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Cholonan.json b/data/Cholonan.json index e246b3e81838a934b78d667dfe4c4947135ffda0..0fbd7f4e703c1e7a1bc94944eaef5414c2247e46 100644 --- a/data/Cholonan.json +++ b/data/Cholonan.json @@ -2,24 +2,30 @@ "name": "Cholonan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chol\u00f3n", "iso_1_code": null, "iso_3_code": "cht", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3430", + "scripts": [], + "own_tokenizer": false }, { "name": "Hibito", "iso_1_code": null, "iso_3_code": "hib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3431", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3429", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chon.json b/data/Chon.json index dc6c4b0c29a47896f91c4673142cabb420ca26e3..7b39b876e4ed8031ea35a46571439304ed356e8a 100644 --- a/data/Chon.json +++ b/data/Chon.json @@ -2,33 +2,41 @@ "name": "Chon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tehuelche", "iso_1_code": null, "iso_3_code": "teh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3433", + "scripts": [], + "own_tokenizer": false }, { "name": "Island Chon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ona", "iso_1_code": null, "iso_3_code": "ona", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3435", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3434", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3432", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chukotko-Kamchatkan.json b/data/Chukotko-Kamchatkan.json index c4391d5410d71b8c7eb345567799cd95e7d51a03..b2d556d33fd441bc578e2fcbe45e77553dcc209e 100644 --- a/data/Chukotko-Kamchatkan.json +++ b/data/Chukotko-Kamchatkan.json @@ -2,84 +2,108 @@ "name": "Chukotko-Kamchatkan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chukot", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chukchi", "iso_1_code": null, "iso_3_code": "ckt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3439", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3438", + "scripts": [], + "own_tokenizer": false }, { "name": "Koryak-Alyutor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alutor", "iso_1_code": null, "iso_3_code": "alr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3441", + "scripts": [], + "own_tokenizer": false }, { "name": "Koryak", "iso_1_code": null, "iso_3_code": "kpy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3442", + "scripts": [], + "own_tokenizer": false }, { "name": "Kerek", "iso_1_code": null, "iso_3_code": "krk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3443", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3437", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Itelmen", "iso_1_code": null, "iso_3_code": "itl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3445", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3444", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3436", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Chumashan.json b/data/Chumashan.json index 5fd1d0861d97972e0e52dfc7781c638d5731837b..8321f978c2fddf4c26f6e85ae0e75dcda7c09e90 100644 --- a/data/Chumashan.json +++ b/data/Chumashan.json @@ -2,74 +2,92 @@ "name": "Chumashan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Obispe\u00f1o", "iso_1_code": null, "iso_3_code": "obi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3447", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Chumash", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Barbare\u00f1o", "iso_1_code": null, "iso_3_code": "boi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3449", + "scripts": [], + "own_tokenizer": false }, { "name": "Inese\u00f1o", "iso_1_code": null, "iso_3_code": "inz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3450", + "scripts": [], + "own_tokenizer": false }, { "name": "Purisime\u00f1o", "iso_1_code": null, "iso_3_code": "puy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3451", + "scripts": [], + "own_tokenizer": false }, { "name": "Venture\u00f1o", "iso_1_code": null, "iso_3_code": "veo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3452", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3448", + "scripts": [], + "own_tokenizer": false }, { "name": "Island Chumash", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cruze\u00f1o", "iso_1_code": null, "iso_3_code": "crz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3454", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3453", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3446", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git "a/data/Cochim\303\255-Yuman.json" "b/data/Cochim\303\255-Yuman.json" index 314f8095ccc38487e4737ca9b577c72805f6015d..a3950f7f3003bffa07c77a2fd970bbe20870e871 100644 --- "a/data/Cochim\303\255-Yuman.json" +++ "b/data/Cochim\303\255-Yuman.json" @@ -2,125 +2,155 @@ "name": "Cochim\u00ed-Yuman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yuman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cochimi", "iso_1_code": null, "iso_3_code": "coj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3457", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiliwa", "iso_1_code": null, "iso_3_code": "klb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3458", + "scripts": [], + "own_tokenizer": false }, { "name": "Delta-California", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cocopa", "iso_1_code": null, "iso_3_code": "coc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3460", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumiai", "iso_1_code": null, "iso_3_code": "dih", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3461", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3459", + "scripts": [], + "own_tokenizer": false }, { "name": "Pai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Paipai", "iso_1_code": null, "iso_3_code": "ppi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3463", + "scripts": [], + "own_tokenizer": false }, { "name": "Havasupai-Walapai-Yavapai", "iso_1_code": null, "iso_3_code": "yuf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3464", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3462", + "scripts": [], + "own_tokenizer": false }, { "name": "River", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mojave", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mohave", "iso_1_code": null, "iso_3_code": "mov", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3467", + "scripts": [], + "own_tokenizer": false }, { "name": "Maricopa", "iso_1_code": null, "iso_3_code": "mrc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3468", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechan", "iso_1_code": null, "iso_3_code": "yum", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3469", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3466", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3465", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3456", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3455", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Comecrudan.json b/data/Comecrudan.json index 6db09f9389fba038d4c97ebe47cdeda3ac691647..0a8d8f8bda6213548067b47c56dcadb6f6639453 100644 --- a/data/Comecrudan.json +++ b/data/Comecrudan.json @@ -2,48 +2,60 @@ "name": "Comecrudan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mamulique", "iso_1_code": null, "iso_3_code": "emm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3471", + "scripts": [], + "own_tokenizer": false }, { "name": "Comecrudo", "iso_1_code": null, "iso_3_code": "xcm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3472", + "scripts": [], + "own_tokenizer": false }, { "name": "Cotoname", "iso_1_code": null, "iso_3_code": "xcn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3473", + "scripts": [], + "own_tokenizer": false }, { "name": "Coahuilteco", "iso_1_code": null, "iso_3_code": "xcw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3474", + "scripts": [], + "own_tokenizer": false }, { "name": "Garza", "iso_1_code": null, "iso_3_code": "xgr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3475", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3470", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Constructed language.json b/data/Constructed language.json index 87c825a14f2265e2c5117902e080acf92a1d2b61..15bc6802ea845803952125e0de154b3b01931d9e 100644 --- a/data/Constructed language.json +++ b/data/Constructed language.json @@ -2,16 +2,22 @@ "name": "Constructed language", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Esperanto", "iso_1_code": "eo", "iso_3_code": "epo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3477", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3476", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Coosan.json b/data/Coosan.json index 5f70a2fa1b536a6f9dc4a5b608a8002c76be6437..65af07a7ad0b5d8a43616896ee43234bcdfff00d 100644 --- a/data/Coosan.json +++ b/data/Coosan.json @@ -2,24 +2,30 @@ "name": "Coosan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Coos", "iso_1_code": null, "iso_3_code": "csz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3479", + "scripts": [], + "own_tokenizer": false }, { "name": "Miluk", "iso_1_code": null, "iso_3_code": "iml", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3480", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3478", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Creole.json b/data/Creole.json index 9c172956c937d2bccddafda5909d342a2cc1bd4d..63f871d2a525cf2d78ba2a4cfc5a78fc8729d6d3 100644 --- a/data/Creole.json +++ b/data/Creole.json @@ -2,1346 +2,2822 @@ "name": "Creole", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Afrikaans based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Flaaitaal", "iso_1_code": null, "iso_3_code": "fly", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3483", + "scripts": [], + "own_tokenizer": false }, { "name": "Oorlams", "iso_1_code": null, "iso_3_code": "oor", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3484", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3482", + "scripts": [], + "own_tokenizer": false }, { "name": "Arabic based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Nubi", "iso_1_code": null, "iso_3_code": "kcn", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3486", + "scripts": [], + "own_tokenizer": false }, { "name": "Arabic, Juba", "iso_1_code": "ar", "iso_3_code": "pga", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ar\")", + "original_lang_name": "arabic", + "original_lang_code": "ara", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3487", + "scripts": [], + "own_tokenizer": true } - ] + ], + "node_i": "3485", + "scripts": [], + "own_tokenizer": false }, { "name": "Assamese based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nagamese", "iso_1_code": null, "iso_3_code": "nag", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3489", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3488", + "scripts": [], + "own_tokenizer": false }, { "name": "Dutch based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Berbice Dutch Creole", "iso_1_code": null, "iso_3_code": "brc", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3491", + "scripts": [], + "own_tokenizer": false }, { "name": "Negerhollands", "iso_1_code": null, "iso_3_code": "dcr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3492", + "scripts": [], + "own_tokenizer": false }, { "name": "Javindo", "iso_1_code": null, "iso_3_code": "jvd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3493", + "scripts": [], + "own_tokenizer": false }, { "name": "Petjo", "iso_1_code": null, "iso_3_code": "pey", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3494", + "scripts": [], + "own_tokenizer": false }, { "name": "Skepi Dutch Creole", "iso_1_code": null, "iso_3_code": "skw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3495", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3490", + "scripts": [], + "own_tokenizer": false }, { "name": "English based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Saramaccan", "iso_1_code": null, "iso_3_code": "srm", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3497", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Atlantic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Turks and Caicos English Creole", "iso_1_code": null, "iso_3_code": "tch", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3500", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Afro-Seminole Creole", "iso_1_code": null, "iso_3_code": "afs", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3502", + "scripts": [], + "own_tokenizer": false }, { "name": "Bahamas English Creole", "iso_1_code": null, "iso_3_code": "bah", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3503", + "scripts": [], + "own_tokenizer": false }, { "name": "Sea Island English Creole", "iso_1_code": null, "iso_3_code": "gul", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3504", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3501", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Leeward Caribbean English Creole", "iso_1_code": null, "iso_3_code": "aig", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3506", + "scripts": [], + "own_tokenizer": false }, { "name": "Bajan", "iso_1_code": null, "iso_3_code": "bjs", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3507", + "scripts": [], + "own_tokenizer": false }, { "name": "Grenadian English Creole", "iso_1_code": null, "iso_3_code": "gcl", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3508", + "scripts": [], + "own_tokenizer": false }, { "name": "Guyanese English Creole", "iso_1_code": null, "iso_3_code": "gyn", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3509", + "scripts": [], + "own_tokenizer": false }, { "name": "Vincentian English Creole", "iso_1_code": null, "iso_3_code": "svc", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3510", + "scripts": [], + "own_tokenizer": false }, { "name": "Tobagonian English Creole", "iso_1_code": null, "iso_3_code": "tgh", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3511", + "scripts": [], + "own_tokenizer": false }, { "name": "Trinidadian English Creole", "iso_1_code": null, "iso_3_code": "trf", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3512", + "scripts": [], + "own_tokenizer": false }, { "name": "Virgin Islands English Creole", "iso_1_code": null, "iso_3_code": "vic", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3513", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3505", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3499", + "scripts": [], + "own_tokenizer": false }, { "name": "Krio", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Equatorial Guinean Pidgin", "iso_1_code": null, "iso_3_code": "fpe", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3515", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghanaian Pidgin English", "iso_1_code": null, "iso_3_code": "gpe", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3516", + "scripts": [], + "own_tokenizer": false }, { "name": "Krio", "iso_1_code": null, "iso_3_code": "kri", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3517", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pidgin, Nigerian", "iso_1_code": null, "iso_3_code": "pcm", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3518", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Pidgin, Cameroon", "iso_1_code": null, "iso_3_code": "wes", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3519", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3514", + "scripts": [], + "own_tokenizer": false }, { "name": "Suriname", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sranan Tongo", "iso_1_code": null, "iso_3_code": "srn", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3521", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ndyuka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aukan", "iso_1_code": null, "iso_3_code": "djk", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3523", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kwinti", "iso_1_code": null, "iso_3_code": "kww", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3524", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3522", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3520", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Belize English Creole", "iso_1_code": null, "iso_3_code": "bzj", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3526", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nicaragua English Creole", "iso_1_code": null, "iso_3_code": "bzk", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3527", + "scripts": [], + "own_tokenizer": false }, { "name": "Islander English Creole", "iso_1_code": null, "iso_3_code": "icr", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3528", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jamaican English Creole", "iso_1_code": null, "iso_3_code": "jam", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3529", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3525", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3498", + "scripts": [], + "own_tokenizer": false }, { "name": "Pacific", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bislama", "iso_1_code": "bi", "iso_3_code": "bis", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3531", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hawaii Pidgin", "iso_1_code": null, "iso_3_code": "hwc", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3532", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngatik Men\u2019s Creole", "iso_1_code": null, "iso_3_code": "ngm", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3533", + "scripts": [], + "own_tokenizer": false }, { "name": "Pitcairn-Norfolk", "iso_1_code": null, "iso_3_code": "pih", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3534", + "scripts": [], + "own_tokenizer": false }, { "name": "Pijin", "iso_1_code": null, "iso_3_code": "pis", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3535", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kriol", "iso_1_code": null, "iso_3_code": "rop", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3536", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Torres Strait Creole", "iso_1_code": null, "iso_3_code": "tcs", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3537", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tok Pisin", "iso_1_code": null, "iso_3_code": "tpi", - "tokenizer": { - "name": "nigerian_pidgin", - "tokenizer": "StanzaTokenizer(\"pcm\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3538", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3530", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3496", + "scripts": [], + "own_tokenizer": false }, { "name": "French based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lesser Antillean French Creole", "iso_1_code": null, "iso_3_code": "acf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3540", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tayo", "iso_1_code": null, "iso_3_code": "cks", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3541", + "scripts": [], + "own_tokenizer": false }, { "name": "Seychelles French Creole", "iso_1_code": null, "iso_3_code": "crs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3542", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guadeloupean French Creole", "iso_1_code": null, "iso_3_code": "gcf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3543", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guianese French Creole", "iso_1_code": null, "iso_3_code": "gcr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3544", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Haitian Creole", "iso_1_code": "ht", "iso_3_code": "hat", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3545", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Karipuna French Creole", "iso_1_code": null, "iso_3_code": "kmv", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3546", + "scripts": [], + "own_tokenizer": false }, { "name": "Louisiana Creole", "iso_1_code": null, "iso_3_code": "lou", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3547", + "scripts": [], + "own_tokenizer": false }, { "name": "Morisyen", "iso_1_code": null, "iso_3_code": "mfe", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3548", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "R\u00e9union French Creole", "iso_1_code": null, "iso_3_code": "rcf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3549", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "San Miguel French Creole", "iso_1_code": null, "iso_3_code": "scf", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3550", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3539", + "scripts": [], + "own_tokenizer": false }, { "name": "German based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Unserdeutsch", "iso_1_code": null, "iso_3_code": "uln", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3552", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3551", + "scripts": [], + "own_tokenizer": false }, { "name": "Hindi based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Andaman Hindi Creole", "iso_1_code": null, "iso_3_code": "hca", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3554", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3553", + "scripts": [], + "own_tokenizer": false }, { "name": "Iberian based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Papiamentu", "iso_1_code": null, "iso_3_code": "pap", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3556", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3555", + "scripts": [], + "own_tokenizer": false }, { "name": "Japanese-based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Yilan Creole", "iso_1_code": null, "iso_3_code": "ycr", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3558", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3557", + "scripts": [], + "own_tokenizer": false }, { "name": "Kongo based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kituba", "iso_1_code": null, "iso_3_code": "ktu", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3560", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kituba", "iso_1_code": null, "iso_3_code": "mkw", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3561", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3559", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Malay, Ambonese", "iso_1_code": null, "iso_3_code": "abs", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3563", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Betawi", "iso_1_code": null, "iso_3_code": "bew", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3564", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malay, Banda", "iso_1_code": null, "iso_3_code": "bpq", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3565", + "scripts": [], + "own_tokenizer": false }, { "name": "Malaccan Malay Creole", "iso_1_code": null, "iso_3_code": "ccm", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3566", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay, Cocos Islands", "iso_1_code": "ms", "iso_3_code": "coa", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3567", + "scripts": [], + "own_tokenizer": true }, { "name": "Malay, Larantuka", "iso_1_code": null, "iso_3_code": "lrt", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3568", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay, North Moluccan", "iso_1_code": "ms", "iso_3_code": "max", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3569", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Malay, Baba", "iso_1_code": null, "iso_3_code": "mbf", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3570", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malay, Balinese", "iso_1_code": null, "iso_3_code": "mhp", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3571", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay, Kupang", "iso_1_code": null, "iso_3_code": "mkn", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3572", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Indonesian, Peranakan", "iso_1_code": null, "iso_3_code": "pea", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3573", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay, Papuan", "iso_1_code": null, "iso_3_code": "pmy", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3574", + "scripts": [], + "own_tokenizer": false }, { "name": "Sri Lankan Malay Creole", "iso_1_code": null, "iso_3_code": "sci", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3575", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay, Manado", "iso_1_code": "ms", "iso_3_code": "xmm", - "tokenizer": { - "name": "malay", - "tokenizer": "SpaCyTokenizer(\"ms\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3576", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3562", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbandi based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Sango", "iso_1_code": "sg", "iso_3_code": "sag", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3578", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sango, Riverain", "iso_1_code": null, "iso_3_code": "snj", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3579", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3577", + "scripts": [], + "own_tokenizer": false }, { "name": "Portuguese based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Angolar", "iso_1_code": null, "iso_3_code": "aoa", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3581", + "scripts": [], + "own_tokenizer": false }, { "name": "Cafundo Creole", "iso_1_code": null, "iso_3_code": "ccd", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3582", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00e3otomense", "iso_1_code": null, "iso_3_code": "cri", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3583", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fa d\u2019Ambu", "iso_1_code": null, "iso_3_code": "fab", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3584", + "scripts": [], + "own_tokenizer": false }, { "name": "Indo-Portuguese", "iso_1_code": null, "iso_3_code": "idb", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3585", + "scripts": [], + "own_tokenizer": false }, { "name": "Kabuverdianu", "iso_1_code": null, "iso_3_code": "kea", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3586", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Malaccan Portuguese Creole", "iso_1_code": null, "iso_3_code": "mcm", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3587", + "scripts": [], + "own_tokenizer": false }, { "name": "Macanese", "iso_1_code": null, "iso_3_code": "mzs", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3588", + "scripts": [], + "own_tokenizer": false }, { "name": "Guinea-Bissau Creole", "iso_1_code": null, "iso_3_code": "pov", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3589", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Principense", "iso_1_code": null, "iso_3_code": "pre", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3590", + "scripts": [], + "own_tokenizer": false }, { "name": "Ternate\u00f1o", "iso_1_code": null, "iso_3_code": "tmg", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3591", + "scripts": [], + "own_tokenizer": false }, { "name": "Pidgin, Timor", "iso_1_code": null, "iso_3_code": "tvy", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3592", + "scripts": [], + "own_tokenizer": false }, { "name": "Korlai Portuguese Creole", "iso_1_code": null, "iso_3_code": "vkp", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3593", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3580", + "scripts": [], + "own_tokenizer": false }, { "name": "Spanish based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chavacano", "iso_1_code": null, "iso_3_code": "cbk", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3595", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Palenquero", "iso_1_code": null, "iso_3_code": "pln", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3596", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3594", + "scripts": [], + "own_tokenizer": false }, { "name": "Swahili based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Cutchi-Swahili", "iso_1_code": null, "iso_3_code": "ccl", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3598", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3597", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetun based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Thai": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tetun Dili", "iso_1_code": null, "iso_3_code": "tdt", - "tokenizer": { - "name": "arabic", - "tokenizer": "SpaCyTokenizer(\"ar\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", + "scripts": [ + "Arab", + "Thai", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3600", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3599", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3481", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Dravidian.json b/data/Dravidian.json index 70e2c13a0e1aff13da8eefe9ec06cb452368b27f..63ef76d33c2c414637ebda7dc6f70bde507d62b1 100644 --- a/data/Dravidian.json +++ b/data/Dravidian.json @@ -2,1270 +2,2877 @@ "name": "Dravidian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kolami-Naiki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kolami, Northwestern", "iso_1_code": null, "iso_3_code": "kfb", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3604", + "scripts": [], + "own_tokenizer": false }, { "name": "Kolami, Southeastern", "iso_1_code": null, "iso_3_code": "nit", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3605", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3603", + "scripts": [], + "own_tokenizer": false }, { "name": "Parji-Gadaba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gadaba, Mudhili", "iso_1_code": null, "iso_3_code": "gau", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3607", + "scripts": [], + "own_tokenizer": false }, { "name": "Gadaba, Pottangi Ollar", "iso_1_code": null, "iso_3_code": "gdb", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3608", + "scripts": [], + "own_tokenizer": false }, { "name": "Duruwa", "iso_1_code": null, "iso_3_code": "pci", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3609", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3606", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3602", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Brahui", "iso_1_code": null, "iso_3_code": "brh", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3611", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Kumarbhag Paharia", "iso_1_code": null, "iso_3_code": "kmj", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3612", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurux", "iso_1_code": null, "iso_3_code": "kru", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3613", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Sauria Paharia", "iso_1_code": null, "iso_3_code": "mjt", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3614", + "scripts": [], + "own_tokenizer": false }, { "name": "Kisan", "iso_1_code": null, "iso_3_code": "xis", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3615", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3610", + "scripts": [], + "own_tokenizer": false }, { "name": "South-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Gondi-Kui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gondi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Maria, Dandami", "iso_1_code": null, "iso_3_code": "daq", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3619", + "scripts": [], + "own_tokenizer": false }, { "name": "Muria, Eastern", "iso_1_code": null, "iso_3_code": "emu", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3620", + "scripts": [], + "own_tokenizer": false }, { "name": "Gondi, Aheri", "iso_1_code": null, "iso_3_code": "esg", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3621", + "scripts": [], + "own_tokenizer": false }, { "name": "Muria, Far Western", "iso_1_code": null, "iso_3_code": "fmu", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3622", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Gondi, Northern", "iso_1_code": null, "iso_3_code": "gno", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3623", + "scripts": [], + "own_tokenizer": false }, { "name": "Khirwar", "iso_1_code": null, "iso_3_code": "kwx", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3624", + "scripts": [], + "own_tokenizer": false }, { "name": "Maria", "iso_1_code": null, "iso_3_code": "mrr", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3625", + "scripts": [], + "own_tokenizer": false }, { "name": "Muria, Western", "iso_1_code": null, "iso_3_code": "mut", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3626", + "scripts": [], + "own_tokenizer": false }, { "name": "Nagarchal", "iso_1_code": null, "iso_3_code": "nbg", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3627", + "scripts": [], + "own_tokenizer": false }, { "name": "Pardhan", "iso_1_code": null, "iso_3_code": "pch", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3628", + "scripts": [], + "own_tokenizer": false }, { "name": "Gondi, Adilabad", "iso_1_code": null, "iso_3_code": "wsg", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3629", + "scripts": [ + "Telu" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3618", + "scripts": [], + "own_tokenizer": false }, { "name": "Konda-Kui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Konda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Konda-Dora", "iso_1_code": null, "iso_3_code": "kfc", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3632", + "scripts": [], + "own_tokenizer": false }, { "name": "Mukha-Dora", "iso_1_code": null, "iso_3_code": "mmk", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3633", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3631", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda-Kui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kui-Kuvi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kui, Dawik", "iso_1_code": null, "iso_3_code": "dwk", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3636", + "scripts": [], + "own_tokenizer": false }, { "name": "Koya", "iso_1_code": null, "iso_3_code": "kff", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3637", + "scripts": [ + "Telu" + ], + "own_tokenizer": false }, { "name": "Kuvi", "iso_1_code": null, "iso_3_code": "kxv", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3638", + "scripts": [], + "own_tokenizer": false }, { "name": "Kui", "iso_1_code": null, "iso_3_code": "uki", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3639", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3635", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda-Pengo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Manda", "iso_1_code": null, "iso_3_code": "mha", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3641", + "scripts": [], + "own_tokenizer": false }, { "name": "Pengo", "iso_1_code": null, "iso_3_code": "peg", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3642", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3640", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3634", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3630", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3617", + "scripts": [], + "own_tokenizer": false }, { "name": "Telugu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Chenchu", "iso_1_code": null, "iso_3_code": "cde", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3644", + "scripts": [], + "own_tokenizer": false }, { "name": "Manna-Dora", "iso_1_code": null, "iso_3_code": "mju", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3645", + "scripts": [], + "own_tokenizer": false }, { "name": "Telugu", "iso_1_code": "te", "iso_3_code": "tel", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3646", + "scripts": [ + "Telu", + "Latn" + ], + "own_tokenizer": true }, { "name": "Waddar", "iso_1_code": null, "iso_3_code": "wbq", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3647", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3643", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3616", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Kurichiya", "iso_1_code": null, "iso_3_code": "kfh", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3649", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurumba, Attapady", "iso_1_code": null, "iso_3_code": "pkr", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3650", + "scripts": [], + "own_tokenizer": false }, { "name": "Pathiya", "iso_1_code": null, "iso_3_code": "pty", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3651", + "scripts": [], + "own_tokenizer": false }, { "name": "Muduga", "iso_1_code": null, "iso_3_code": "udg", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3652", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumbaran", "iso_1_code": null, "iso_3_code": "wkb", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3653", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalanadi", "iso_1_code": null, "iso_3_code": "wkl", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3654", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunduvadi", "iso_1_code": null, "iso_3_code": "wku", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3655", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamil-Kannada", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Kannada", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Badaga", "iso_1_code": null, "iso_3_code": "bfq", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3658", + "scripts": [], + "own_tokenizer": false }, { "name": "Holiya", "iso_1_code": null, "iso_3_code": "hoy", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3659", + "scripts": [], + "own_tokenizer": false }, { "name": "Kannada", "iso_1_code": "kn", "iso_3_code": "kan", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3660", + "scripts": [ + "Latn", + "Knda" + ], + "own_tokenizer": true }, { "name": "Urali", "iso_1_code": null, "iso_3_code": "url", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3661", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3657", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamil-Kodagu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Kodagu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kodava", "iso_1_code": null, "iso_3_code": "kfa", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3664", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurumba, Kannada", "iso_1_code": null, "iso_3_code": "kfi", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3665", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurumba, Mullu", "iso_1_code": null, "iso_3_code": "kpb", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3666", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurumba, Alu", "iso_1_code": null, "iso_3_code": "xua", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3667", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurumba, Jennu", "iso_1_code": null, "iso_3_code": "xuj", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3668", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3663", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamil-Malayalam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Mannan", "iso_1_code": null, "iso_3_code": "mjv", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3670", + "scripts": [], + "own_tokenizer": false }, { "name": "Malayalam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Aranadan", "iso_1_code": null, "iso_3_code": "aaf", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3672", + "scripts": [], + "own_tokenizer": false }, { "name": "Kadar", "iso_1_code": null, "iso_3_code": "kej", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3673", + "scripts": [], + "own_tokenizer": false }, { "name": "Malayalam", "iso_1_code": "ml", "iso_3_code": "mal", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3674", + "scripts": [ + "Latn", + "Mlym" + ], + "own_tokenizer": true }, { "name": "Malapandaram", "iso_1_code": null, "iso_3_code": "mjp", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3675", + "scripts": [], + "own_tokenizer": false }, { "name": "Malaryan", "iso_1_code": null, "iso_3_code": "mjq", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3676", + "scripts": [], + "own_tokenizer": false }, { "name": "Malavedan", "iso_1_code": null, "iso_3_code": "mjr", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3677", + "scripts": [], + "own_tokenizer": false }, { "name": "Paliyan", "iso_1_code": null, "iso_3_code": "pcf", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3678", + "scripts": [], + "own_tokenizer": false }, { "name": "Paniya", "iso_1_code": null, "iso_3_code": "pcg", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3679", + "scripts": [], + "own_tokenizer": false }, { "name": "Ravula", "iso_1_code": null, "iso_3_code": "yea", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3680", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3671", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" + "tokenizers": { + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eravallan", "iso_1_code": null, "iso_3_code": "era", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3682", + "scripts": [], + "own_tokenizer": false }, { "name": "Irula", "iso_1_code": null, "iso_3_code": "iru", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3683", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaikadi", "iso_1_code": null, "iso_3_code": "kep", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3684", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanikkaran", "iso_1_code": null, "iso_3_code": "kev", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3685", + "scripts": [], + "own_tokenizer": false }, { "name": "Muthuvan", "iso_1_code": null, "iso_3_code": "muv", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3686", + "scripts": [], + "own_tokenizer": false }, { "name": "Sholaga", "iso_1_code": null, "iso_3_code": "sle", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3687", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamil", "iso_1_code": "ta", "iso_3_code": "tam", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" + "tokenizers": { + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3688", + "scripts": [ + "Taml", + "Latn" + ], + "own_tokenizer": true }, { "name": "Kurumba, Betta", "iso_1_code": null, "iso_3_code": "xub", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3689", + "scripts": [], + "own_tokenizer": false }, { "name": "Yerukula", "iso_1_code": null, "iso_3_code": "yeu", - "tokenizer": { - "name": "tamil", - "tokenizer": "SpaCyTokenizer(\"ta\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3690", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3681", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3669", + "scripts": [], + "own_tokenizer": false }, { "name": "Toda-Kota", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kota", "iso_1_code": null, "iso_3_code": "kfe", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3692", + "scripts": [], + "own_tokenizer": false }, { "name": "Toda", "iso_1_code": null, "iso_3_code": "tcx", - "tokenizer": { - "name": "malayalam", - "tokenizer": "SpaCyTokenizer(\"ml\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3693", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3691", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3662", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chetti, Wayanad", "iso_1_code": null, "iso_3_code": "ctt", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3695", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3694", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3656", + "scripts": [], + "own_tokenizer": false }, { "name": "Tulu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bellari", "iso_1_code": null, "iso_3_code": "brw", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3697", + "scripts": [], + "own_tokenizer": false }, { "name": "Kudiya", "iso_1_code": null, "iso_3_code": "kfg", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3698", + "scripts": [], + "own_tokenizer": false }, { "name": "Tulu", "iso_1_code": null, "iso_3_code": "tcy", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3699", + "scripts": [ + "Knda" + ], + "own_tokenizer": false }, { "name": "Koraga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koraga, Korra", "iso_1_code": null, "iso_3_code": "kfd", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3701", + "scripts": [], + "own_tokenizer": false }, { "name": "Koraga, Mudu", "iso_1_code": null, "iso_3_code": "vmd", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3702", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3700", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3696", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mala Malasar", "iso_1_code": null, "iso_3_code": "ima", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3704", + "scripts": [], + "own_tokenizer": false }, { "name": "Thachanadan", "iso_1_code": null, "iso_3_code": "thn", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3705", + "scripts": [], + "own_tokenizer": false }, { "name": "Ullatan", "iso_1_code": null, "iso_3_code": "ull", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3706", + "scripts": [], + "own_tokenizer": false }, { "name": "Malasar", "iso_1_code": null, "iso_3_code": "ymr", - "tokenizer": { - "name": "kannada", - "tokenizer": "SpaCyTokenizer(\"kn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3707", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3703", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3648", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Telu": { + "full_object": "SpaCyTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "scripts": [ + "Latn", + "Telu" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Knda": { + "full_object": "SpaCyTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "scripts": [ + "Latn", + "Knda" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Mlym": { + "full_object": "SpaCyTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "scripts": [ + "Latn", + "Mlym" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Taml": { + "full_object": "SpaCyTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "scripts": [ + "Taml", + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Allar", "iso_1_code": null, "iso_3_code": "all", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3709", + "scripts": [], + "own_tokenizer": false }, { "name": "Bharia", "iso_1_code": null, "iso_3_code": "bha", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3710", + "scripts": [], + "own_tokenizer": false }, { "name": "Malankuravan", "iso_1_code": null, "iso_3_code": "mjo", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3711", + "scripts": [], + "own_tokenizer": false }, { "name": "Pattapu", "iso_1_code": null, "iso_3_code": "ptq", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3712", + "scripts": [], + "own_tokenizer": false }, { "name": "Vishavan", "iso_1_code": null, "iso_3_code": "vis", - "tokenizer": { - "name": "telugu", - "tokenizer": "SpaCyTokenizer(\"te\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3713", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3708", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3601", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git "a/data/East Bird\342\200\231s Head-Sentani.json" "b/data/East Bird\342\200\231s Head-Sentani.json" index fc2888bc503c7054a69dca983d7ab5a1bfc9aa05..e9654bd51e92afdc797c0256ea34ded19942d0e6 100644 --- "a/data/East Bird\342\200\231s Head-Sentani.json" +++ "b/data/East Bird\342\200\231s Head-Sentani.json" @@ -2,135 +2,173 @@ "name": "East Bird\u2019s Head-Sentani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Burmeso", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Burmeso", "iso_1_code": null, "iso_3_code": "bzu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3716", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3715", + "scripts": [], + "own_tokenizer": false }, { "name": "East Bird\u2019s Head", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mantion", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sougb", "iso_1_code": null, "iso_3_code": "mnx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3719", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3718", + "scripts": [], + "own_tokenizer": false }, { "name": "Meax", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Meyah", "iso_1_code": null, "iso_3_code": "mej", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3721", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Moskona", "iso_1_code": null, "iso_3_code": "mtj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3722", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3720", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3717", + "scripts": [], + "own_tokenizer": false }, { "name": "Sentani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Demta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sowari", "iso_1_code": null, "iso_3_code": "dmy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3725", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3724", + "scripts": [], + "own_tokenizer": false }, { "name": "Sentani Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nafri", "iso_1_code": null, "iso_3_code": "nxx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3727", + "scripts": [], + "own_tokenizer": false }, { "name": "Sentani", "iso_1_code": null, "iso_3_code": "set", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3728", + "scripts": [], + "own_tokenizer": false }, { "name": "Tabla", "iso_1_code": null, "iso_3_code": "tnm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3729", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3726", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3723", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3714", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/East Geelvink Bay.json b/data/East Geelvink Bay.json index fb5cb0ce7432154de321691bf441cc2b0fb6a910..1ddc32546c3e463991bb3c5096f697a765893bdd 100644 --- a/data/East Geelvink Bay.json +++ b/data/East Geelvink Bay.json @@ -2,113 +2,143 @@ "name": "East Geelvink Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anasi", "iso_1_code": null, "iso_3_code": "bpo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3731", + "scripts": [], + "own_tokenizer": false }, { "name": "Barapasi", "iso_1_code": null, "iso_3_code": "brp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3732", + "scripts": [], + "own_tokenizer": false }, { "name": "Burate", "iso_1_code": null, "iso_3_code": "bti", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3733", + "scripts": [], + "own_tokenizer": false }, { "name": "Kehu", "iso_1_code": null, "iso_3_code": "khh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3734", + "scripts": [], + "own_tokenizer": false }, { "name": "Kofei", "iso_1_code": null, "iso_3_code": "kpi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3735", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisa", "iso_1_code": null, "iso_3_code": "njs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3736", + "scripts": [], + "own_tokenizer": false }, { "name": "Sauri", "iso_1_code": null, "iso_3_code": "srt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3737", + "scripts": [], + "own_tokenizer": false }, { "name": "Tefaro", "iso_1_code": null, "iso_3_code": "tfo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3738", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunggare", "iso_1_code": null, "iso_3_code": "trt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3739", + "scripts": [], + "own_tokenizer": false }, { "name": "Woria", "iso_1_code": null, "iso_3_code": "wor", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3740", + "scripts": [], + "own_tokenizer": false }, { "name": "Bauzi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bauzi", "iso_1_code": null, "iso_3_code": "bvz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3742", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Demisa", "iso_1_code": null, "iso_3_code": "dei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3743", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3741", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3730", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/East New Britain.json b/data/East New Britain.json index b0c4c51f168390d3d97d9720b701067499646385..03c7037618d0ec00f4a35f529df048f3d9cd9303 100644 --- a/data/East New Britain.json +++ b/data/East New Britain.json @@ -2,82 +2,104 @@ "name": "East New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baining", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Qaqet", "iso_1_code": null, "iso_3_code": "byx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3746", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kairak", "iso_1_code": null, "iso_3_code": "ckr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3747", + "scripts": [], + "own_tokenizer": false }, { "name": "Mali", "iso_1_code": null, "iso_3_code": "gcc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3748", + "scripts": [], + "own_tokenizer": false }, { "name": "Simbali", "iso_1_code": null, "iso_3_code": "smg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3749", + "scripts": [], + "own_tokenizer": false }, { "name": "Ura", "iso_1_code": null, "iso_3_code": "uro", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3750", + "scripts": [], + "own_tokenizer": false }, { "name": "Makolkol", "iso_1_code": null, "iso_3_code": "zmh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3751", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3745", + "scripts": [], + "own_tokenizer": false }, { "name": "Taulil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tulil", "iso_1_code": null, "iso_3_code": "tuh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3753", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3752", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3744", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Eastern Trans-Fly.json b/data/Eastern Trans-Fly.json index 62baad0c0b7c0211c9ef3a9e2d6184e53f1c258e..5dde51991a5b0049df1b8b91efa8adaa8102cdc0 100644 --- a/data/Eastern Trans-Fly.json +++ b/data/Eastern Trans-Fly.json @@ -2,40 +2,54 @@ "name": "Eastern Trans-Fly", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bine", "iso_1_code": null, "iso_3_code": "bon", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3755", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wipi", "iso_1_code": null, "iso_3_code": "gdr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3756", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gizrra", "iso_1_code": null, "iso_3_code": "tof", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3757", + "scripts": [], + "own_tokenizer": false }, { "name": "Meriam Mir", "iso_1_code": null, "iso_3_code": "ulk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3758", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3754", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Eskimo-Aleut.json b/data/Eskimo-Aleut.json index 77cbfc4d6ab3c23b4173c8146aaca5ec6b268c60..56f5ad0284b4a55f5bae0cc36612fcdd7b6986bb 100644 --- a/data/Eskimo-Aleut.json +++ b/data/Eskimo-Aleut.json @@ -2,141 +2,189 @@ "name": "Eskimo-Aleut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aleut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aleut", "iso_1_code": null, "iso_3_code": "ale", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3761", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3760", + "scripts": [], + "own_tokenizer": false }, { "name": "Eskimo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Inuit-Inupiaq", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Inupiatun, North Alaskan", "iso_1_code": "ik", "iso_3_code": "esi", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3764", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Inupiatun, Northwest Alaska", "iso_1_code": "ik", "iso_3_code": "esk", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3765", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Inuktitut, Eastern Canadian", "iso_1_code": "iu", "iso_3_code": "ike", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3766", + "scripts": [ + "Cans" + ], + "own_tokenizer": false }, { "name": "Inuinnaqtun", "iso_1_code": "iu", "iso_3_code": "ikt", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3767", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Greenlandic", "iso_1_code": "kl", "iso_3_code": "kal", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3768", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3763", + "scripts": [], + "own_tokenizer": false }, { "name": "Yupik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yupik, Saint Lawrence Island", "iso_1_code": null, "iso_3_code": "ess", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3770", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yupik, Naukan", "iso_1_code": null, "iso_3_code": "ynk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3771", + "scripts": [], + "own_tokenizer": false }, { "name": "Yupik, Sirenik", "iso_1_code": null, "iso_3_code": "ysr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3772", + "scripts": [], + "own_tokenizer": false }, { "name": "Alaskan Yupik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yupik, Pacific Gulf", "iso_1_code": null, "iso_3_code": "ems", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3774", + "scripts": [], + "own_tokenizer": false }, { "name": "Yupik, Central", "iso_1_code": null, "iso_3_code": "esu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3775", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3773", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3769", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3762", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3759", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Eyak-Athabaskan.json b/data/Eyak-Athabaskan.json index d81c8126f95d2451ffb4702c670a1c01c39f82a4..cdb5b25f480d5cd63fbf75b93541f5910f359d1c 100644 --- a/data/Eyak-Athabaskan.json +++ b/data/Eyak-Athabaskan.json @@ -2,510 +2,648 @@ "name": "Eyak-Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eyak", "iso_1_code": null, "iso_3_code": "eya", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3777", + "scripts": [], + "own_tokenizer": false }, { "name": "Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apachean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Navajo", "iso_1_code": "nv", "iso_3_code": "nav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3780", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Apache", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apache, Jicarilla", "iso_1_code": null, "iso_3_code": "apj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3782", + "scripts": [], + "own_tokenizer": false }, { "name": "Apache, Kiowa", "iso_1_code": null, "iso_3_code": "apk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3783", + "scripts": [], + "own_tokenizer": false }, { "name": "Apache, Lipan", "iso_1_code": null, "iso_3_code": "apl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3784", + "scripts": [], + "own_tokenizer": false }, { "name": "Apache, Mescalero-Chiricahua", "iso_1_code": null, "iso_3_code": "apm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3785", + "scripts": [], + "own_tokenizer": false }, { "name": "Apache, Western", "iso_1_code": null, "iso_3_code": "apw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3786", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3781", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3779", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ahtena", "iso_1_code": null, "iso_3_code": "aht", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3788", + "scripts": [], + "own_tokenizer": false }, { "name": "Babine", "iso_1_code": null, "iso_3_code": "bcr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3789", + "scripts": [], + "own_tokenizer": false }, { "name": "Beaver", "iso_1_code": null, "iso_3_code": "bea", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3790", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dene", "iso_1_code": null, "iso_3_code": "chp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3791", + "scripts": [], + "own_tokenizer": false }, { "name": "Chilcotin", "iso_1_code": null, "iso_3_code": "clc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3792", + "scripts": [], + "own_tokenizer": false }, { "name": "Tlicho", "iso_1_code": null, "iso_3_code": "dgr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3793", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gwich\u2019in", "iso_1_code": null, "iso_3_code": "gwi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3794", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Han", "iso_1_code": null, "iso_3_code": "haa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3795", + "scripts": [], + "own_tokenizer": false }, { "name": "Holikachuk", "iso_1_code": null, "iso_3_code": "hoi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3796", + "scripts": [], + "own_tokenizer": false }, { "name": "Deg Xinag", "iso_1_code": null, "iso_3_code": "ing", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3797", + "scripts": [], + "own_tokenizer": false }, { "name": "Koyukon", "iso_1_code": null, "iso_3_code": "koy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3798", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuskokwim, Upper", "iso_1_code": null, "iso_3_code": "kuu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3799", + "scripts": [], + "own_tokenizer": false }, { "name": "Sekani", "iso_1_code": null, "iso_3_code": "sek", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3800", + "scripts": [], + "own_tokenizer": false }, { "name": "Sarsi", "iso_1_code": null, "iso_3_code": "srs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3801", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanana, Lower", "iso_1_code": null, "iso_3_code": "taa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3802", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanana, Upper", "iso_1_code": null, "iso_3_code": "tau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3803", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanacross", "iso_1_code": null, "iso_3_code": "tcb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3804", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanaina", "iso_1_code": null, "iso_3_code": "tfn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3805", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsetsaut", "iso_1_code": null, "iso_3_code": "txc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3806", + "scripts": [], + "own_tokenizer": false }, { "name": "Carrier", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Carrier, Southern", "iso_1_code": null, "iso_3_code": "caf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3808", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Carrier", "iso_1_code": null, "iso_3_code": "crx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3809", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3807", + "scripts": [], + "own_tokenizer": false }, { "name": "Slavey-Hare", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Slavey, North", "iso_1_code": null, "iso_3_code": "scs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3811", + "scripts": [], + "own_tokenizer": false }, { "name": "Slavey, South", "iso_1_code": null, "iso_3_code": "xsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3812", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3810", + "scripts": [], + "own_tokenizer": false }, { "name": "Tahltan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaska", "iso_1_code": null, "iso_3_code": "kkz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3814", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagish", "iso_1_code": null, "iso_3_code": "tgx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3815", + "scripts": [], + "own_tokenizer": false }, { "name": "Tahltan", "iso_1_code": null, "iso_3_code": "tht", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3816", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3813", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuchone", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tutchone, Southern", "iso_1_code": null, "iso_3_code": "tce", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3818", + "scripts": [], + "own_tokenizer": false }, { "name": "Tutchone, Northern", "iso_1_code": null, "iso_3_code": "ttm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3819", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3817", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3787", + "scripts": [], + "own_tokenizer": false }, { "name": "Pacific Coast Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kwalhioqua-Tlatskanai", "iso_1_code": null, "iso_3_code": "qwt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3821", + "scripts": [], + "own_tokenizer": false }, { "name": "California Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hupa", "iso_1_code": null, "iso_3_code": "hup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3823", + "scripts": [], + "own_tokenizer": false }, { "name": "Kato", "iso_1_code": null, "iso_3_code": "ktw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3824", + "scripts": [], + "own_tokenizer": false }, { "name": "Mattole", "iso_1_code": null, "iso_3_code": "mvb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3825", + "scripts": [], + "own_tokenizer": false }, { "name": "Wailaki", "iso_1_code": null, "iso_3_code": "wlk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3826", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3822", + "scripts": [], + "own_tokenizer": false }, { "name": "Oregon Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Galice", "iso_1_code": null, "iso_3_code": "gce", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3828", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper Umpqua", "iso_1_code": null, "iso_3_code": "xup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3829", + "scripts": [], + "own_tokenizer": false }, { "name": "Tolowa-Chetco", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chetco", "iso_1_code": null, "iso_3_code": "ctc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3831", + "scripts": [], + "own_tokenizer": false }, { "name": "Tolowa", "iso_1_code": null, "iso_3_code": "tol", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3832", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3830", + "scripts": [], + "own_tokenizer": false }, { "name": "Tututni-Chasta Costa-Coquille", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Coquille", "iso_1_code": null, "iso_3_code": "coq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3834", + "scripts": [], + "own_tokenizer": false }, { "name": "Tututni", "iso_1_code": null, "iso_3_code": "tuu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3835", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3833", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3827", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3820", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3778", + "scripts": [], + "own_tokenizer": false }, { "name": "Tlingit", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tlingit", "iso_1_code": null, "iso_3_code": "tli", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3837", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3836", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3776", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Fas.json b/data/Fas.json index dc65d3d0e5de7e5f3bdb5a556eb9e9a68cb5f1ce..d1e3afd11b986f365b5d4fb784809b33d392c65e 100644 --- a/data/Fas.json +++ b/data/Fas.json @@ -2,24 +2,30 @@ "name": "Fas", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baibai", "iso_1_code": null, "iso_3_code": "bbf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3839", + "scripts": [], + "own_tokenizer": false }, { "name": "Momu", "iso_1_code": null, "iso_3_code": "fqs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3840", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3838", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Guajiboan.json b/data/Guajiboan.json index 26faed2b6903f067eac223f1b29e180f6b3192d6..f079c78d771bbe17eb78863747020793008ee8d7 100644 --- a/data/Guajiboan.json +++ b/data/Guajiboan.json @@ -2,57 +2,77 @@ "name": "Guajiboan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cuiba", "iso_1_code": null, "iso_3_code": "cui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3842", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guayabero", "iso_1_code": null, "iso_3_code": "guo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3843", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guajibo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Playero", "iso_1_code": null, "iso_3_code": "gob", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3845", + "scripts": [], + "own_tokenizer": false }, { "name": "Guahibo", "iso_1_code": null, "iso_3_code": "guh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3846", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Macagu\u00e1n", "iso_1_code": null, "iso_3_code": "mbn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3847", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3844", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3841", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Guaykuruan.json b/data/Guaykuruan.json index 1396dfdf0f1c586b341d9a91bbdcda3c0ac92c8d..f197e498db2b4abf80e38247380b5a0a4a108984 100644 --- a/data/Guaykuruan.json +++ b/data/Guaykuruan.json @@ -2,66 +2,90 @@ "name": "Guaykuruan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guaykur\u00fa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abipon", "iso_1_code": null, "iso_3_code": "axb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3850", + "scripts": [], + "own_tokenizer": false }, { "name": "Kadiw\u00e9u", "iso_1_code": null, "iso_3_code": "kbc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3851", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3849", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mocov\u00ed", "iso_1_code": null, "iso_3_code": "moc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3853", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pilag\u00e1", "iso_1_code": null, "iso_3_code": "plg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3854", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Toba", "iso_1_code": null, "iso_3_code": "tob", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3855", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3852", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3848", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Gum.json b/data/Gum.json index 329c2d272030fd98d9b66a7efe58619e4bad3960..6978c139a98e2404815b76ea649d1056221c6b04 100644 --- a/data/Gum.json +++ b/data/Gum.json @@ -2,7 +2,9 @@ "name": "Gum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3856", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Haida.json b/data/Haida.json index 5bad536b7b595b1f62aad3babbb9f33aab29c96c..f2055ec455656050da85d82d5cf0c6fefa4f6041 100644 --- a/data/Haida.json +++ b/data/Haida.json @@ -2,24 +2,30 @@ "name": "Haida", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Haida, Southern", "iso_1_code": null, "iso_3_code": "hax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3858", + "scripts": [], + "own_tokenizer": false }, { "name": "Haida, Northern", "iso_1_code": null, "iso_3_code": "hdn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3859", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3857", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git "a/data/Har\303\241kmbut.json" "b/data/Har\303\241kmbut.json" index e1c1ae5a3afd0c2d3d026114357a4d4f4489dacb..3835a5367ea7e3f78944531fead1f0145fbb5204 100644 --- "a/data/Har\303\241kmbut.json" +++ "b/data/Har\303\241kmbut.json" @@ -2,24 +2,32 @@ "name": "Har\u00e1kmbut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amarakaeri", "iso_1_code": null, "iso_3_code": "amr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3861", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Huachipaeri", "iso_1_code": null, "iso_3_code": "hug", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3862", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3860", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Hmong-Mien.json b/data/Hmong-Mien.json index d7e79166b3b1110ce5bdca11269d75903fe49b59..04d2cb2a28e603fae657bb1bc31a3b784ff42391 100644 --- a/data/Hmong-Mien.json +++ b/data/Hmong-Mien.json @@ -2,419 +2,527 @@ "name": "Hmong-Mien", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hmongic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bunu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bunu, Younuo", "iso_1_code": null, "iso_3_code": "buh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3866", + "scripts": [], + "own_tokenizer": false }, { "name": "Bunu, Wunai", "iso_1_code": null, "iso_3_code": "bwn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3867", + "scripts": [], + "own_tokenizer": false }, { "name": "Bunu, Bu-Nao", "iso_1_code": null, "iso_3_code": "bwx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3868", + "scripts": [], + "own_tokenizer": false }, { "name": "Bunu, Jiongnai", "iso_1_code": null, "iso_3_code": "pnu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3869", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3865", + "scripts": [], + "own_tokenizer": false }, { "name": "Chuanqiandian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Miao, Chuanqiandian Cluster", "iso_1_code": null, "iso_3_code": "cqd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3871", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Southern Mashan", "iso_1_code": null, "iso_3_code": "hma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3872", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Central Huishui", "iso_1_code": null, "iso_3_code": "hmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3873", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Large Flowery", "iso_1_code": null, "iso_3_code": "hmd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3874", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Eastern Huishui", "iso_1_code": null, "iso_3_code": "hme", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3875", + "scripts": [], + "own_tokenizer": false }, { "name": "Hmong Don", "iso_1_code": null, "iso_3_code": "hmf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3876", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Southwestern Guiyang", "iso_1_code": null, "iso_3_code": "hmg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3877", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Southwestern Huishui", "iso_1_code": null, "iso_3_code": "hmh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3878", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Northern Huishui", "iso_1_code": null, "iso_3_code": "hmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3879", + "scripts": [], + "own_tokenizer": false }, { "name": "Ge", "iso_1_code": null, "iso_3_code": "hmj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3880", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Luopohe", "iso_1_code": null, "iso_3_code": "hml", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3881", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Central Mashan", "iso_1_code": null, "iso_3_code": "hmm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3882", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Northern Mashan", "iso_1_code": null, "iso_3_code": "hmp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3883", + "scripts": [], + "own_tokenizer": false }, { "name": "Hmong D\u00f4", "iso_1_code": null, "iso_3_code": "hmv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3884", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Western Mashan", "iso_1_code": null, "iso_3_code": "hmw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3885", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Southern Guiyang", "iso_1_code": null, "iso_3_code": "hmy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3886", + "scripts": [], + "own_tokenizer": false }, { "name": "Sinicized Miao", "iso_1_code": null, "iso_3_code": "hmz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3887", + "scripts": [], + "own_tokenizer": false }, { "name": "Hmong Njua", "iso_1_code": null, "iso_3_code": "hnj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3888", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Miao, Horned", "iso_1_code": null, "iso_3_code": "hrm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3889", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Northern Guiyang", "iso_1_code": null, "iso_3_code": "huj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3890", + "scripts": [], + "own_tokenizer": false }, { "name": "Hmong Daw", "iso_1_code": null, "iso_3_code": "mww", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3891", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Miao, Small Flowery", "iso_1_code": null, "iso_3_code": "sfm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3892", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3870", + "scripts": [], + "own_tokenizer": false }, { "name": "Pa-hng", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pa-Hng", "iso_1_code": null, "iso_3_code": "pha", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3894", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3893", + "scripts": [], + "own_tokenizer": false }, { "name": "Qiandong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Miao, Northern Qiandong", "iso_1_code": null, "iso_3_code": "hea", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3896", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Eastern Qiandong", "iso_1_code": null, "iso_3_code": "hmq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3897", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Southern Qiandong", "iso_1_code": null, "iso_3_code": "hms", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3898", + "scripts": [], + "own_tokenizer": false }, { "name": "N\u00e1-Meo", "iso_1_code": null, "iso_3_code": "neo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3899", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3895", + "scripts": [], + "own_tokenizer": false }, { "name": "Xiangxi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Miao, Western Xiangxi", "iso_1_code": null, "iso_3_code": "mmr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3901", + "scripts": [], + "own_tokenizer": false }, { "name": "Miao, Eastern Xiangxi", "iso_1_code": null, "iso_3_code": "muq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3902", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3900", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3864", + "scripts": [], + "own_tokenizer": false }, { "name": "Ho Nte", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "She", "iso_1_code": null, "iso_3_code": "shx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3904", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3903", + "scripts": [], + "own_tokenizer": false }, { "name": "Mienic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Biao-Jiao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Biao-Jiao Mien", "iso_1_code": null, "iso_3_code": "bje", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3907", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3906", + "scripts": [], + "own_tokenizer": false }, { "name": "Mian-Jin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Biao Mon", "iso_1_code": null, "iso_3_code": "bmt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3909", + "scripts": [], + "own_tokenizer": false }, { "name": "Iu Mien", "iso_1_code": null, "iso_3_code": "ium", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3910", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kim Mun", "iso_1_code": null, "iso_3_code": "mji", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3911", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3908", + "scripts": [], + "own_tokenizer": false }, { "name": "Zaomin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dzao Min", "iso_1_code": null, "iso_3_code": "bpn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3913", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3912", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3905", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3863", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Huavean.json b/data/Huavean.json index b8dee332fa0373ef189f327f3206001bcaa4bdbb..c368d20c4a8f1f036e2f850b5d872af30af43bec 100644 --- a/data/Huavean.json +++ b/data/Huavean.json @@ -2,40 +2,52 @@ "name": "Huavean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Huave, San Francisco del Mar", "iso_1_code": null, "iso_3_code": "hue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3915", + "scripts": [], + "own_tokenizer": false }, { "name": "Huave, San Mateo del Mar", "iso_1_code": null, "iso_3_code": "huv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3916", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Huave, San Dionisio del Mar", "iso_1_code": null, "iso_3_code": "hve", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3917", + "scripts": [], + "own_tokenizer": false }, { "name": "Huave, Santa Mar\u00eda del Mar", "iso_1_code": null, "iso_3_code": "hvv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3918", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3914", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Indo-European.json b/data/Indo-European.json index 990705b7b9382a181b0e3761faa64bc162d4f580..7f173ebd578b7c49ad58d8557c4c702ba6ceec02 100644 --- a/data/Indo-European.json +++ b/data/Indo-European.json @@ -2,6852 +2,26738 @@ "name": "Indo-European", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Albanian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sq\")", + "original_lang_name": "albanian", + "original_lang_code": "sqi", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Gheg", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sq\")", + "original_lang_name": "albanian", + "original_lang_code": "sqi", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Albanian, Gheg", "iso_1_code": "sq", "iso_3_code": "aln", - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sq\")", + "original_lang_name": "albanian", + "original_lang_code": "sqi", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3922", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3921", + "scripts": [], + "own_tokenizer": false }, { "name": "Tosk", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sq\")", + "original_lang_name": "albanian", + "original_lang_code": "sqi", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Albanian, Arb\u00ebresh\u00eb", "iso_1_code": "sq", "iso_3_code": "aae", - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sq\")", + "original_lang_name": "albanian", + "original_lang_code": "sqi", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3924", + "scripts": [], + "own_tokenizer": true }, { "name": "Albanian, Arvanitika", "iso_1_code": "sq", "iso_3_code": "aat", - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sq\")", + "original_lang_name": "albanian", + "original_lang_code": "sqi", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3925", + "scripts": [], + "own_tokenizer": true }, { "name": "Albanian, Tosk", "iso_1_code": "sq", "iso_3_code": "als", - "tokenizer": { - "name": "albanian", - "tokenizer": "SpaCyTokenizer(\"sq\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sq\")", + "original_lang_name": "albanian", + "original_lang_code": "sqi", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3926", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3923", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3920", + "scripts": [], + "own_tokenizer": false }, { "name": "Armenian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "armenian", - "tokenizer": "SpaCyTokenizer(\"hy\")" + "tokenizers": { + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Armenian", "iso_1_code": "hy", "iso_3_code": "hye", - "tokenizer": { - "name": "armenian", - "tokenizer": "SpaCyTokenizer(\"hy\")" + "tokenizers": { + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3928", + "scripts": [ + "Armn" + ], + "own_tokenizer": true }, { "name": "Armenian, Western", "iso_1_code": null, "iso_3_code": "hyw", - "tokenizer": { - "name": "western_armenian", - "tokenizer": "StanzaTokenizer(\"hyw\")" + "tokenizers": { + "Armn": { + "full_object": "StanzaTokenizer(\"hyw\")", + "original_lang_name": "western_armenian", + "original_lang_code": "hyw", + "scripts": [ + "Armn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3929", + "scripts": [ + "Armn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3927", + "scripts": [], + "own_tokenizer": false }, { "name": "Balto-Slavic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Baltic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Lithuanian", "iso_1_code": "lt", "iso_3_code": "lit", - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3933", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Latgalian", "iso_1_code": "lv", "iso_3_code": "ltg", - "tokenizer": { - "name": "latvian", - "tokenizer": "SpaCyTokenizer(\"lv\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lv\")", + "original_lang_name": "latvian", + "original_lang_code": "lav", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3934", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Latvian, Standard", "iso_1_code": "lv", "iso_3_code": "lvs", - "tokenizer": { - "name": "latvian", - "tokenizer": "SpaCyTokenizer(\"lv\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lv\")", + "original_lang_name": "latvian", + "original_lang_code": "lav", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3935", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Samogitian", "iso_1_code": null, "iso_3_code": "sgs", - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3936", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zemgalian", "iso_1_code": null, "iso_3_code": "xzm", - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3937", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3932", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Prussian", "iso_1_code": null, "iso_3_code": "prg", - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3939", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sudovian", "iso_1_code": null, "iso_3_code": "xsv", - "tokenizer": { - "name": "lithuanian", - "tokenizer": "SpaCyTokenizer(\"lt\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3940", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3938", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3931", + "scripts": [], + "own_tokenizer": false }, { "name": "Slavic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russian", - "tokenizer": "SpaCyTokenizer(\"ru\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russian", - "tokenizer": "SpaCyTokenizer(\"ru\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Belarusian", "iso_1_code": "be", "iso_3_code": "bel", - "tokenizer": { - "name": "belarusian", - "tokenizer": "StanzaTokenizer(\"be\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"be\")", + "original_lang_name": "belarusian", + "original_lang_code": "bel", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3943", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Ruthenian", "iso_1_code": null, "iso_3_code": "rsk", - "tokenizer": { - "name": "russian", - "tokenizer": "SpaCyTokenizer(\"ru\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3944", + "scripts": [], + "own_tokenizer": false }, { "name": "Rusyn", "iso_1_code": null, "iso_3_code": "rue", - "tokenizer": { - "name": "russian", - "tokenizer": "SpaCyTokenizer(\"ru\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3945", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Russian", "iso_1_code": "ru", "iso_3_code": "rus", - "tokenizer": { - "name": "russian", - "tokenizer": "SpaCyTokenizer(\"ru\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3946", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Ukrainian", "iso_1_code": "uk", "iso_3_code": "ukr", - "tokenizer": { - "name": "ukrainian", - "tokenizer": "SpaCyTokenizer(\"uk\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"uk\")", + "original_lang_name": "ukrainian", + "original_lang_code": "ukr", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3947", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3942", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "bulgarian", - "tokenizer": "SpaCyTokenizer(\"bg\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"bg\")", + "original_lang_name": "bulgarian", + "original_lang_code": "bul", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbocroatian", + "original_lang_code": "hbs", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "bulgarian", - "tokenizer": "SpaCyTokenizer(\"bg\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"bg\")", + "original_lang_name": "bulgarian", + "original_lang_code": "bul", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbocroatian", + "original_lang_code": "hbs", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Bulgarian", "iso_1_code": "bg", "iso_3_code": "bul", - "tokenizer": { - "name": "bulgarian", - "tokenizer": "SpaCyTokenizer(\"bg\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"bg\")", + "original_lang_name": "bulgarian", + "original_lang_code": "bul", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3950", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Slavonic, Church", "iso_1_code": "cu", "iso_3_code": "chu", - "tokenizer": { - "name": "old_bulgarian", - "tokenizer": "StanzaTokenizer(\"cu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"cu\")", + "original_lang_name": "old_bulgarian", + "original_lang_code": "chu", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3951", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Macedonian", "iso_1_code": "mk", "iso_3_code": "mkd", - "tokenizer": { - "name": "macedonian", - "tokenizer": "SpaCyTokenizer(\"mk\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"mk\")", + "original_lang_name": "macedonian", + "original_lang_code": "mkd", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3952", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3949", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "serbocroatian", - "tokenizer": "SpaCyTokenizer(\"sr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbocroatian", + "original_lang_code": "hbs", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbian", + "original_lang_code": "srp", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Bosnian", "iso_1_code": "bs", "iso_3_code": "bos", - "tokenizer": { - "name": "serbocroatian", - "tokenizer": "SpaCyTokenizer(\"sr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbocroatian", + "original_lang_code": "hbs", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3954", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Chakavian", "iso_1_code": null, "iso_3_code": "ckm", - "tokenizer": { - "name": "serbocroatian", - "tokenizer": "SpaCyTokenizer(\"sr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbocroatian", + "original_lang_code": "hbs", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3955", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Montenegrin", "iso_1_code": "sh", "iso_3_code": "cnr", - "tokenizer": { - "name": "serbocroatian", - "tokenizer": "SpaCyTokenizer(\"sr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbocroatian", + "original_lang_code": "hbs", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "3956", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Croatian", "iso_1_code": "hr", "iso_3_code": "hrv", - "tokenizer": { - "name": "croatian", - "tokenizer": "SpaCyTokenizer(\"hr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hr\")", + "original_lang_name": "croatian", + "original_lang_code": "hrv", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3957", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Slovene", "iso_1_code": "sl", "iso_3_code": "slv", - "tokenizer": { - "name": "slovenian", - "tokenizer": "SpaCyTokenizer(\"sl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sl\")", + "original_lang_name": "slovenian", + "original_lang_code": "slv", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3958", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Serbian", "iso_1_code": "sr", "iso_3_code": "srp", - "tokenizer": { - "name": "serbian", - "tokenizer": "SpaCyTokenizer(\"sr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbian", + "original_lang_code": "srp", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbian", + "original_lang_code": "srp", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3959", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Slavomolisano", "iso_1_code": null, "iso_3_code": "svm", - "tokenizer": { - "name": "serbocroatian", - "tokenizer": "SpaCyTokenizer(\"sr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3960", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3953", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3948", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "czech", - "tokenizer": "SpaCyTokenizer(\"cz\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Czech-Slovak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "czech", - "tokenizer": "SpaCyTokenizer(\"cz\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"cs\")", + "original_lang_name": "czech", + "original_lang_code": "ces", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Czech", "iso_1_code": "cs", "iso_3_code": "ces", - "tokenizer": { - "name": "czech", - "tokenizer": "SpaCyTokenizer(\"cz\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"cs\")", + "original_lang_name": "czech", + "original_lang_code": "ces", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3963", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Knaanic", "iso_1_code": null, "iso_3_code": "czk", - "tokenizer": { - "name": "czech", - "tokenizer": "SpaCyTokenizer(\"cz\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3964", + "scripts": [], + "own_tokenizer": false }, { "name": "Slovak", "iso_1_code": "sk", "iso_3_code": "slk", - "tokenizer": { - "name": "slovak", - "tokenizer": "SpaCyTokenizer(\"sk\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sk\")", + "original_lang_name": "slovak", + "original_lang_code": "slk", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3965", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3962", + "scripts": [], + "own_tokenizer": false }, { "name": "Lechitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "polish", - "tokenizer": "SpaCyTokenizer(\"pl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Kashubian", "iso_1_code": null, "iso_3_code": "csb", - "tokenizer": { - "name": "polish", - "tokenizer": "SpaCyTokenizer(\"pl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3967", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Polish", "iso_1_code": "pl", "iso_3_code": "pol", - "tokenizer": { - "name": "polish", - "tokenizer": "SpaCyTokenizer(\"pl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3968", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Polabian", "iso_1_code": null, "iso_3_code": "pox", - "tokenizer": { - "name": "polish", - "tokenizer": "SpaCyTokenizer(\"pl\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3969", + "scripts": [], + "own_tokenizer": false }, { "name": "Silesian", "iso_1_code": null, "iso_3_code": "szl", - "tokenizer": { - "name": "polish", - "tokenizer": "SpaCyTokenizer(\"pl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3970", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "3966", + "scripts": [], + "own_tokenizer": false }, { "name": "Sorbian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "lower_sorbian", - "tokenizer": "SpaCyTokenizer(\"dsb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hsb\")", + "original_lang_name": "upper_sorbian", + "original_lang_code": "hsb", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Sorbian, Lower", "iso_1_code": null, "iso_3_code": "dsb", - "tokenizer": { - "name": "lower_sorbian", - "tokenizer": "SpaCyTokenizer(\"dsb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"dsb\")", + "original_lang_name": "lower_sorbian", + "original_lang_code": "dsb", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3972", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Sorbian, Upper", "iso_1_code": null, "iso_3_code": "hsb", - "tokenizer": { - "name": "upper_sorbian", - "tokenizer": "SpaCyTokenizer(\"hsb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hsb\")", + "original_lang_name": "upper_sorbian", + "original_lang_code": "hsb", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3973", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3971", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3961", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3941", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3930", + "scripts": [], + "own_tokenizer": false }, { "name": "Celtic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "irish", - "tokenizer": "SpaCyTokenizer(\"ga\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Insular", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "irish", - "tokenizer": "SpaCyTokenizer(\"ga\")" - }, - "source": "bottom", - "children": [ - { - "name": "Brythonic", - "iso_1_code": null, - "iso_3_code": null, - "tokenizer": { - "name": "welsh", - "tokenizer": "StanzaTokenizer(\"cy\")" - }, - "source": "bottom", - "children": [ - { + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [ + { + "name": "Brythonic", + "iso_1_code": null, + "iso_3_code": null, + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [ + { "name": "Breton", "iso_1_code": "br", "iso_3_code": "bre", - "tokenizer": { - "name": "welsh", - "tokenizer": "StanzaTokenizer(\"cy\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3977", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cornish", "iso_1_code": "kw", "iso_3_code": "cor", - "tokenizer": { - "name": "welsh", - "tokenizer": "StanzaTokenizer(\"cy\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "3978", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Welsh", "iso_1_code": "cy", "iso_3_code": "cym", - "tokenizer": { - "name": "welsh", - "tokenizer": "StanzaTokenizer(\"cy\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3979", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3976", + "scripts": [], + "own_tokenizer": false }, { "name": "Goidelic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "irish", - "tokenizer": "SpaCyTokenizer(\"ga\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ga\")", + "original_lang_name": "irish", + "original_lang_code": "gle", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Scottish Gaelic", "iso_1_code": "gd", "iso_3_code": "gla", - "tokenizer": { - "name": "gaelic", - "tokenizer": "StanzaTokenizer(\"gd\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"gd\")", + "original_lang_name": "gaelic", + "original_lang_code": "gla", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3981", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Irish", "iso_1_code": "ga", "iso_3_code": "gle", - "tokenizer": { - "name": "irish", - "tokenizer": "SpaCyTokenizer(\"ga\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ga\")", + "original_lang_name": "irish", + "original_lang_code": "gle", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3982", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Manx", "iso_1_code": "gv", "iso_3_code": "glv", - "tokenizer": { - "name": "manx", - "tokenizer": "StanzaTokenizer(\"gv\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"gv\")", + "original_lang_name": "manx", + "original_lang_code": "glv", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3983", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3980", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3975", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3974", + "scripts": [], + "own_tokenizer": false }, { "name": "Germanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "norwegian", - "tokenizer": "SpaCyTokenizer(\"nb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "norwegian", - "tokenizer": "SpaCyTokenizer(\"nb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "East Scandinavian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "norwegian", - "tokenizer": "SpaCyTokenizer(\"nb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "\u00d6vdalian", "iso_1_code": null, "iso_3_code": "ovd", - "tokenizer": { - "name": "norwegian", - "tokenizer": "SpaCyTokenizer(\"nb\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3987", + "scripts": [], + "own_tokenizer": false }, { "name": "Danish-Swedish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "norwegian", - "tokenizer": "SpaCyTokenizer(\"nb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Danish-Bokmal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "norwegian", - "tokenizer": "SpaCyTokenizer(\"nb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Norwegian", "iso_1_code": "no", "iso_3_code": "nor", - "tokenizer": { - "name": "norwegian", - "tokenizer": "SpaCyTokenizer(\"nb\")" - }, - "source": "own", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3990", + "scripts": [], + "own_tokenizer": true } - ] + ], + "node_i": "3989", + "scripts": [], + "own_tokenizer": false }, { "name": "Danish-Riksmal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "danish", - "tokenizer": "SpaCyTokenizer(\"da\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"da\")", + "original_lang_name": "danish", + "original_lang_code": "dan", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Danish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "danish", - "tokenizer": "SpaCyTokenizer(\"da\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"da\")", + "original_lang_name": "danish", + "original_lang_code": "dan", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Danish", "iso_1_code": "da", "iso_3_code": "dan", - "tokenizer": { - "name": "danish", - "tokenizer": "SpaCyTokenizer(\"da\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"da\")", + "original_lang_name": "danish", + "original_lang_code": "dan", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3993", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3992", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3991", + "scripts": [], + "own_tokenizer": false }, { "name": "Swedish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "swedish", - "tokenizer": "SpaCyTokenizer(\"sv\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Swedish", "iso_1_code": "sv", "iso_3_code": "swe", - "tokenizer": { - "name": "swedish", - "tokenizer": "SpaCyTokenizer(\"sv\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3995", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "3994", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3988", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3986", + "scripts": [], + "own_tokenizer": false }, { "name": "West Scandinavian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "faroese", - "tokenizer": "SpaCyTokenizer(\"fo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"is\")", + "original_lang_name": "icelandic", + "original_lang_code": "isl", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Faroese", "iso_1_code": "fo", "iso_3_code": "fao", - "tokenizer": { - "name": "faroese", - "tokenizer": "SpaCyTokenizer(\"fo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fo\")", + "original_lang_name": "faroese", + "original_lang_code": "fao", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3997", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Icelandic", "iso_1_code": "is", "iso_3_code": "isl", - "tokenizer": { - "name": "icelandic", - "tokenizer": "SpaCyTokenizer(\"is\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"is\")", + "original_lang_name": "icelandic", + "original_lang_code": "isl", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "3998", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Norn", "iso_1_code": null, "iso_3_code": "nrn", - "tokenizer": { - "name": "faroese", - "tokenizer": "SpaCyTokenizer(\"fo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "3999", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3996", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3985", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "English", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "English", "iso_1_code": "en", "iso_3_code": "eng", - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4002", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Scots", "iso_1_code": null, "iso_3_code": "sco", - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4003", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yola", "iso_1_code": null, "iso_3_code": "yol", - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4004", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4001", + "scripts": [], + "own_tokenizer": false }, { "name": "Frisian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Frisian, Northern", "iso_1_code": null, "iso_3_code": "frr", - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4006", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Frisian", "iso_1_code": "fy", "iso_3_code": "fry", - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4007", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Saterfriesisch", "iso_1_code": null, "iso_3_code": "stq", - "tokenizer": { - "name": "english", - "tokenizer": "SpaCyTokenizer(\"en\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4008", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4005", + "scripts": [], + "own_tokenizer": false }, { "name": "High German", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "German", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Hunsrik", "iso_1_code": null, "iso_3_code": "hrx", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4011", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Middle German", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "East Middle German", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "German, Standard", "iso_1_code": "de", "iso_3_code": "deu", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4014", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Silesian, Lower", "iso_1_code": null, "iso_3_code": "sli", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4015", + "scripts": [], + "own_tokenizer": false }, { "name": "Saxon, Upper", "iso_1_code": null, "iso_3_code": "sxu", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4016", + "scripts": [], + "own_tokenizer": false }, { "name": "Wymysorys", "iso_1_code": null, "iso_3_code": "wym", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4017", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4013", + "scripts": [], + "own_tokenizer": false }, { "name": "West Middle German", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "luxembourgish", - "tokenizer": "SpaCyTokenizer(\"lb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Ripuarian", "iso_1_code": null, "iso_3_code": "ksh", - "tokenizer": { - "name": "luxembourgish", - "tokenizer": "SpaCyTokenizer(\"lb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4019", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "German, Pennsylvania", "iso_1_code": null, "iso_3_code": "pdc", - "tokenizer": { - "name": "luxembourgish", - "tokenizer": "SpaCyTokenizer(\"lb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4020", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Palatinate Franconian", "iso_1_code": null, "iso_3_code": "pfl", - "tokenizer": { - "name": "luxembourgish", - "tokenizer": "SpaCyTokenizer(\"lb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4021", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Moselle Franconian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "luxembourgish", - "tokenizer": "SpaCyTokenizer(\"lb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Luxembourgish", "iso_1_code": "lb", "iso_3_code": "ltz", - "tokenizer": { - "name": "luxembourgish", - "tokenizer": "SpaCyTokenizer(\"lb\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4023", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "4022", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4018", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4012", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper German", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern Franconian", "iso_1_code": null, "iso_3_code": "vmf", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4025", + "scripts": [], + "own_tokenizer": false }, { "name": "Alemannic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "German, Colonia Tovar", "iso_1_code": null, "iso_3_code": "gct", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4027", + "scripts": [], + "own_tokenizer": false }, { "name": "German, Swiss", "iso_1_code": null, "iso_3_code": "gsw", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4028", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Swabian", "iso_1_code": null, "iso_3_code": "swg", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4029", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Walser", "iso_1_code": null, "iso_3_code": "wae", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4030", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4026", + "scripts": [], + "own_tokenizer": false }, { "name": "Bavarian-Austrian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bavarian", "iso_1_code": null, "iso_3_code": "bar", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4032", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cimbrian", "iso_1_code": null, "iso_3_code": "cim", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4033", + "scripts": [], + "own_tokenizer": false }, { "name": "Hutterisch", "iso_1_code": null, "iso_3_code": "geh", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4034", + "scripts": [], + "own_tokenizer": false }, { "name": "M\u00f2cheno", "iso_1_code": null, "iso_3_code": "mhn", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4035", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4031", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4024", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4010", + "scripts": [], + "own_tokenizer": false }, { "name": "Yiddish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yiddish, Eastern", "iso_1_code": "yi", "iso_3_code": "ydd", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4037", + "scripts": [ + "Hebr" + ], + "own_tokenizer": false }, { "name": "Yiddish, Western", "iso_1_code": "yi", "iso_3_code": "yih", - "tokenizer": { - "name": "german", - "tokenizer": "SpaCyTokenizer(\"de\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4038", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4036", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4009", + "scripts": [], + "own_tokenizer": false }, { "name": "Low Saxon-Low Franconian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Low Franconian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Afrikaans", "iso_1_code": "af", "iso_3_code": "afr", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"af\")", + "original_lang_name": "afrikaans", + "original_lang_code": "afr", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4041", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Limburgish", "iso_1_code": "li", "iso_3_code": "lim", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4042", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dutch", "iso_1_code": "nl", "iso_3_code": "nld", - "tokenizer": { - "name": "dutch", - "tokenizer": "SpaCyTokenizer(\"nl\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4043", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "West Flemish", "iso_1_code": null, "iso_3_code": "vls", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4044", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zeeuws", "iso_1_code": null, "iso_3_code": "zea", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4045", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4040", + "scripts": [], + "own_tokenizer": false }, { "name": "Low Saxon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Achterhoeks", "iso_1_code": null, "iso_3_code": "act", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4047", + "scripts": [], + "own_tokenizer": false }, { "name": "Drents", "iso_1_code": null, "iso_3_code": "drt", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4048", + "scripts": [], + "own_tokenizer": false }, { "name": "Saxon, East Frisian Low", "iso_1_code": null, "iso_3_code": "frs", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4049", + "scripts": [], + "own_tokenizer": false }, { "name": "Gronings", "iso_1_code": null, "iso_3_code": "gos", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4050", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Saxon, Low", "iso_1_code": null, "iso_3_code": "nds", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4051", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Plautdietsch", "iso_1_code": null, "iso_3_code": "pdt", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4052", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sallands", "iso_1_code": null, "iso_3_code": "sdz", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4053", + "scripts": [], + "own_tokenizer": false }, { "name": "Stellingwerfs", "iso_1_code": null, "iso_3_code": "stl", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4054", + "scripts": [], + "own_tokenizer": false }, { "name": "Twents", "iso_1_code": null, "iso_3_code": "twd", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4055", + "scripts": [], + "own_tokenizer": false }, { "name": "Veluws", "iso_1_code": null, "iso_3_code": "vel", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4056", + "scripts": [], + "own_tokenizer": false }, { "name": "Westphalien", "iso_1_code": null, "iso_3_code": "wep", - "tokenizer": { - "name": "afrikaans", - "tokenizer": "SpaCyTokenizer(\"af\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4057", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4046", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4039", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4000", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3984", + "scripts": [], + "own_tokenizer": false }, { "name": "Greek", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" + "tokenizers": { + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Attic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" + "tokenizers": { + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Cappadocian Greek", "iso_1_code": null, "iso_3_code": "cpg", - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4060", + "scripts": [], + "own_tokenizer": false }, { "name": "Greek", "iso_1_code": "el", "iso_3_code": "ell", - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" + "tokenizers": { + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4061", + "scripts": [ + "Grek" + ], + "own_tokenizer": true }, { "name": "Greek, Ancient", "iso_1_code": null, "iso_3_code": "grc", - "tokenizer": { - "name": "ancient_greek", - "tokenizer": "SpaCyTokenizer(\"grc\")" + "tokenizers": { + "Grek": { + "full_object": "SpaCyTokenizer(\"grc\")", + "original_lang_name": "ancient_greek", + "original_lang_code": "grc", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4062", + "scripts": [ + "Grek" + ], + "own_tokenizer": true }, { "name": "Pontic", "iso_1_code": null, "iso_3_code": "pnt", - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" + "tokenizers": { + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4063", + "scripts": [ + "Grek" + ], + "own_tokenizer": false }, { "name": "Yevanic", "iso_1_code": null, "iso_3_code": "yej", - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4064", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "4059", + "scripts": [], + "own_tokenizer": false + }, + { + "name": "Doric", + "iso_1_code": null, + "iso_3_code": null, + "tokenizers": { + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false } - ] - }, - { - "name": "Doric", - "iso_1_code": null, - "iso_3_code": null, - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" }, - "source": "down", "children": [ { "name": "Tsakonian", "iso_1_code": null, "iso_3_code": "tsd", - "tokenizer": { - "name": "greek", - "tokenizer": "SpaCyTokenizer(\"el\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4066", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4065", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4058", + "scripts": [], + "own_tokenizer": false }, { "name": "Indo-Iranian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Indo-Aryan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Sanskrit", "iso_1_code": "sa", "iso_3_code": "san", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sa\")", + "original_lang_name": "sanskrit", + "original_lang_code": "san", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"sa\")", + "original_lang_name": "sanskrit", + "original_lang_code": "san", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4069", + "scripts": [ + "Deva", + "Latn" + ], + "own_tokenizer": true }, { "name": "Intermediate Divisions", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "East Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awadhi", "iso_1_code": null, "iso_3_code": "awa", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4073", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Bagheli", "iso_1_code": null, "iso_3_code": "bfy", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4074", + "scripts": [], + "own_tokenizer": false }, { "name": "Fiji Hindi", "iso_1_code": null, "iso_3_code": "hif", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4075", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chhattisgarhi", "iso_1_code": null, "iso_3_code": "hne", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4076", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Kamar", "iso_1_code": null, "iso_3_code": "keq", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4077", + "scripts": [], + "own_tokenizer": false }, { "name": "Surgujia", "iso_1_code": null, "iso_3_code": "sgj", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4078", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4072", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Dotyali", "iso_1_code": "ne", "iso_3_code": "dty", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "4080", + "scripts": [ + "Deva" + ], + "own_tokenizer": true }, { "name": "Jumli", "iso_1_code": null, "iso_3_code": "jml", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4081", + "scripts": [], + "own_tokenizer": false }, { "name": "Nepali", "iso_1_code": "ne", "iso_3_code": "npi", - "tokenizer": { - "name": "nepali", - "tokenizer": "SpaCyTokenizer(\"ne\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "4082", + "scripts": [ + "Latn", + "Deva" + ], + "own_tokenizer": true } - ] + ], + "node_i": "4079", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4071", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Dawoodi", "iso_1_code": null, "iso_3_code": "dmk", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4084", + "scripts": [], + "own_tokenizer": false }, { "name": "Parya", "iso_1_code": null, "iso_3_code": "paq", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4085", + "scripts": [], + "own_tokenizer": false }, { "name": "Powari", "iso_1_code": null, "iso_3_code": "pwr", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4086", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bareli, Pauri", "iso_1_code": null, "iso_3_code": "bfb", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4088", + "scripts": [], + "own_tokenizer": false }, { "name": "Bareli, Rathwi", "iso_1_code": null, "iso_3_code": "bgd", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4089", + "scripts": [], + "own_tokenizer": false }, { "name": "Bauria", "iso_1_code": null, "iso_3_code": "bge", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4090", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhili", "iso_1_code": null, "iso_3_code": "bhb", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4091", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhilali", "iso_1_code": null, "iso_3_code": "bhi", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4092", + "scripts": [], + "own_tokenizer": false }, { "name": "Bareli, Palya", "iso_1_code": null, "iso_3_code": "bpx", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4093", + "scripts": [], + "own_tokenizer": false }, { "name": "Chodri", "iso_1_code": null, "iso_3_code": "cdi", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4094", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhodia", "iso_1_code": null, "iso_3_code": "dho", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4095", + "scripts": [], + "own_tokenizer": false }, { "name": "Dubli", "iso_1_code": null, "iso_3_code": "dub", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4096", + "scripts": [], + "own_tokenizer": false }, { "name": "Dungra Bhil", "iso_1_code": null, "iso_3_code": "duh", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4097", + "scripts": [], + "own_tokenizer": false }, { "name": "Garasia, Adiwasi", "iso_1_code": null, "iso_3_code": "gas", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4098", + "scripts": [], + "own_tokenizer": false }, { "name": "Gamit", "iso_1_code": null, "iso_3_code": "gbl", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4099", + "scripts": [], + "own_tokenizer": false }, { "name": "Garasia, Rajput", "iso_1_code": null, "iso_3_code": "gra", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4100", + "scripts": [], + "own_tokenizer": false }, { "name": "Mawchi", "iso_1_code": null, "iso_3_code": "mke", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4101", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahali", "iso_1_code": null, "iso_3_code": "nlx", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4102", + "scripts": [], + "own_tokenizer": false }, { "name": "Noiri", "iso_1_code": null, "iso_3_code": "noi", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4103", + "scripts": [], + "own_tokenizer": false }, { "name": "Pardhi", "iso_1_code": null, "iso_3_code": "pcl", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4104", + "scripts": [], + "own_tokenizer": false }, { "name": "Rathawi", "iso_1_code": null, "iso_3_code": "rtw", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4105", + "scripts": [], + "own_tokenizer": false }, { "name": "Wagdi", "iso_1_code": null, "iso_3_code": "wbr", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4106", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4087", + "scripts": [], + "own_tokenizer": false }, { "name": "Dom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Domari", "iso_1_code": null, "iso_3_code": "rmt", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4108", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4107", + "scripts": [], + "own_tokenizer": false }, { "name": "Gujarati", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Aer", "iso_1_code": null, "iso_3_code": "aeq", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4110", + "scripts": [], + "own_tokenizer": false }, { "name": "Koli, Kachi", "iso_1_code": null, "iso_3_code": "gjk", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4111", + "scripts": [], + "own_tokenizer": false }, { "name": "Gujarati", "iso_1_code": "gu", "iso_3_code": "guj", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4112", + "scripts": [ + "Gujr", + "Latn" + ], + "own_tokenizer": true }, { "name": "Jandavra", "iso_1_code": null, "iso_3_code": "jnd", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4113", + "scripts": [], + "own_tokenizer": false }, { "name": "Koli, Parkari", "iso_1_code": null, "iso_3_code": "kvx", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4114", + "scripts": [], + "own_tokenizer": false }, { "name": "Koli, Wadiyari", "iso_1_code": null, "iso_3_code": "kxp", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4115", + "scripts": [], + "own_tokenizer": false }, { "name": "Sourashtra", "iso_1_code": null, "iso_3_code": "saz", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4116", + "scripts": [], + "own_tokenizer": false }, { "name": "Vasavi", "iso_1_code": null, "iso_3_code": "vas", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4117", + "scripts": [], + "own_tokenizer": false }, { "name": "Vaghri", "iso_1_code": null, "iso_3_code": "vgr", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4118", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4109", + "scripts": [], + "own_tokenizer": false }, { "name": "Khandesi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ahirani", "iso_1_code": null, "iso_3_code": "ahr", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4120", + "scripts": [], + "own_tokenizer": false }, { "name": "Dangi", "iso_1_code": null, "iso_3_code": "dhn", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4121", + "scripts": [], + "own_tokenizer": false }, { "name": "Khandesi", "iso_1_code": null, "iso_3_code": "khn", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4122", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4119", + "scripts": [], + "own_tokenizer": false }, { "name": "Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kumaoni", "iso_1_code": null, "iso_3_code": "kfy", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4125", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4124", + "scripts": [], + "own_tokenizer": false }, { "name": "Garhwali", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Garhwali", "iso_1_code": null, "iso_3_code": "gbm", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4127", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4126", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pahari, Mahasu", "iso_1_code": null, "iso_3_code": "bfz", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4129", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhadrawahi", "iso_1_code": null, "iso_3_code": "bhd", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4130", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhattiyali", "iso_1_code": null, "iso_3_code": "bht", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4131", + "scripts": [], + "own_tokenizer": false }, { "name": "Chambeali", "iso_1_code": null, "iso_3_code": "cdh", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4132", + "scripts": [], + "own_tokenizer": false }, { "name": "Churahi", "iso_1_code": null, "iso_3_code": "cdj", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4133", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogri", "iso_1_code": null, "iso_3_code": "dgo", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4134", + "scripts": [], + "own_tokenizer": false }, { "name": "Gaddi", "iso_1_code": null, "iso_3_code": "gbk", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4135", + "scripts": [], + "own_tokenizer": false }, { "name": "Hinduri", "iso_1_code": null, "iso_3_code": "hii", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4136", + "scripts": [], + "own_tokenizer": false }, { "name": "Khah", "iso_1_code": null, "iso_3_code": "hkh", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4137", + "scripts": [], + "own_tokenizer": false }, { "name": "Jaunsari", "iso_1_code": null, "iso_3_code": "jns", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4138", + "scripts": [], + "own_tokenizer": false }, { "name": "Bilaspuri", "iso_1_code": null, "iso_3_code": "kfs", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4139", + "scripts": [], + "own_tokenizer": false }, { "name": "Pahari, Kullu", "iso_1_code": null, "iso_3_code": "kfx", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4140", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinnauri, Pahari", "iso_1_code": null, "iso_3_code": "kjo", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4141", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandeali", "iso_1_code": null, "iso_3_code": "mjl", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4142", + "scripts": [], + "own_tokenizer": false }, { "name": "Pangwali", "iso_1_code": null, "iso_3_code": "pgg", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4143", + "scripts": [], + "own_tokenizer": false }, { "name": "Sirmauri", "iso_1_code": null, "iso_3_code": "srx", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4144", + "scripts": [], + "own_tokenizer": false }, { "name": "Kangri", "iso_1_code": null, "iso_3_code": "xnr", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4145", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4128", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4123", + "scripts": [], + "own_tokenizer": false }, { "name": "Panjabi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Punjabi, Eastern", "iso_1_code": "pa", "iso_3_code": "pan", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4147", + "scripts": [ + "Latn", + "Guru" + ], + "own_tokenizer": true }, { "name": "Western Panjabi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hindko, Southern", "iso_1_code": null, "iso_3_code": "hnd", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4149", + "scripts": [], + "own_tokenizer": false }, { "name": "Hindko, Northern", "iso_1_code": null, "iso_3_code": "hno", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4150", + "scripts": [], + "own_tokenizer": false }, { "name": "Inku", "iso_1_code": null, "iso_3_code": "jat", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4151", + "scripts": [], + "own_tokenizer": false }, { "name": "Pahari-Potwari", "iso_1_code": null, "iso_3_code": "phr", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4152", + "scripts": [], + "own_tokenizer": false }, { "name": "Punjabi, Western", "iso_1_code": null, "iso_3_code": "pnb", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4153", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Saraiki", "iso_1_code": null, "iso_3_code": "skr", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4154", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Khetrani", "iso_1_code": null, "iso_3_code": "xhe", - "tokenizer": { - "name": "punjabi", - "tokenizer": "IndicNLPTokenizer(\"pa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4155", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4148", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4146", + "scripts": [], + "own_tokenizer": false }, { "name": "Rajasthani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gujari", "iso_1_code": null, "iso_3_code": "gju", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4157", + "scripts": [], + "own_tokenizer": false }, { "name": "Marwari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dhundari", "iso_1_code": null, "iso_3_code": "dhd", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4159", + "scripts": [], + "own_tokenizer": false }, { "name": "Godwari", "iso_1_code": null, "iso_3_code": "gdx", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4160", + "scripts": [], + "own_tokenizer": false }, { "name": "Goaria", "iso_1_code": null, "iso_3_code": "gig", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4161", + "scripts": [], + "own_tokenizer": false }, { "name": "Jogi", "iso_1_code": null, "iso_3_code": "jog", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4162", + "scripts": [], + "own_tokenizer": false }, { "name": "Loarki", "iso_1_code": null, "iso_3_code": "lrk", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4163", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhatki", "iso_1_code": null, "iso_3_code": "mki", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4164", + "scripts": [], + "own_tokenizer": false }, { "name": "Mewari", "iso_1_code": null, "iso_3_code": "mtr", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4165", + "scripts": [], + "own_tokenizer": false }, { "name": "Marwari", "iso_1_code": null, "iso_3_code": "mve", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4166", + "scripts": [], + "own_tokenizer": false }, { "name": "Marwari", "iso_1_code": null, "iso_3_code": "rwr", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4167", + "scripts": [], + "own_tokenizer": false }, { "name": "Shekhawati", "iso_1_code": null, "iso_3_code": "swv", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4168", + "scripts": [], + "own_tokenizer": false }, { "name": "Merwari", "iso_1_code": null, "iso_3_code": "wry", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4169", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4158", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bagri", "iso_1_code": null, "iso_3_code": "bgq", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4171", + "scripts": [], + "own_tokenizer": false }, { "name": "Lohar, Gade", "iso_1_code": null, "iso_3_code": "gda", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4172", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurgula", "iso_1_code": null, "iso_3_code": "ggg", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4173", + "scripts": [], + "own_tokenizer": false }, { "name": "Haroti", "iso_1_code": null, "iso_3_code": "hoj", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4174", + "scripts": [], + "own_tokenizer": false }, { "name": "Lambadi", "iso_1_code": null, "iso_3_code": "lmn", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4175", + "scripts": [], + "own_tokenizer": false }, { "name": "Malvi", "iso_1_code": null, "iso_3_code": "mup", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4176", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Nimadi", "iso_1_code": null, "iso_3_code": "noe", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4177", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4170", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4156", + "scripts": [], + "own_tokenizer": false }, { "name": "Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Balkan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Romani, Balkan", "iso_1_code": null, "iso_3_code": "rmn", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4180", + "scripts": [ + "Latn", + "Cyrl", + "Grek" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4179", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Romani, Carpathian", "iso_1_code": null, "iso_3_code": "rmc", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4182", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romani, Kalo Finnish", "iso_1_code": null, "iso_3_code": "rmf", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4183", + "scripts": [], + "own_tokenizer": false }, { "name": "Romani, Baltic", "iso_1_code": null, "iso_3_code": "rml", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4184", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romani, Sinte", "iso_1_code": null, "iso_3_code": "rmo", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4185", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romani, Welsh", "iso_1_code": null, "iso_3_code": "rmw", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4186", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4181", + "scripts": [], + "own_tokenizer": false }, { "name": "Vlax", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Romani, Vlax", "iso_1_code": null, "iso_3_code": "rmy", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4188", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4187", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4178", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"ne\")", + "original_lang_name": "nepali", + "original_lang_code": "nep", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sonha", "iso_1_code": null, "iso_3_code": "soi", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4190", + "scripts": [], + "own_tokenizer": false }, { "name": "Mewati", "iso_1_code": null, "iso_3_code": "wtm", - "tokenizer": { - "name": "gujarati", - "tokenizer": "SpaCyTokenizer(\"gu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4191", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4189", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4083", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4070", + "scripts": [], + "own_tokenizer": false }, { "name": "Outer Languages", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Bengali-Assamese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Assamese", "iso_1_code": "as", "iso_3_code": "asm", - "tokenizer": { - "name": "assamese", - "tokenizer": "IndicNLPTokenizer(\"as\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"as\")", + "original_lang_name": "assamese", + "original_lang_code": "asm", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "IndicNLPTokenizer(\"as\")", + "original_lang_name": "assamese", + "original_lang_code": "asm", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4195", + "scripts": [ + "Beng", + "Latn" + ], + "own_tokenizer": true }, { "name": "Bengali", "iso_1_code": "bn", "iso_3_code": "ben", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4196", + "scripts": [ + "Beng", + "Latn" + ], + "own_tokenizer": true }, { "name": "Bishnupuriya", "iso_1_code": null, "iso_3_code": "bpy", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4197", + "scripts": [ + "Beng" + ], + "own_tokenizer": false }, { "name": "Chakma", "iso_1_code": null, "iso_3_code": "ccp", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4198", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chittagonian", "iso_1_code": null, "iso_3_code": "ctg", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4199", + "scripts": [], + "own_tokenizer": false }, { "name": "Hajong", "iso_1_code": null, "iso_3_code": "haj", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4200", + "scripts": [], + "own_tokenizer": false }, { "name": "Halbi", "iso_1_code": null, "iso_3_code": "hlb", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4201", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurmukar", "iso_1_code": null, "iso_3_code": "kfv", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4202", + "scripts": [], + "own_tokenizer": false }, { "name": "Kharia Thar", "iso_1_code": null, "iso_3_code": "ksy", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4203", + "scripts": [], + "own_tokenizer": false }, { "name": "Kewat", "iso_1_code": null, "iso_3_code": "kyv", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4204", + "scripts": [], + "own_tokenizer": false }, { "name": "Lodhi", "iso_1_code": null, "iso_3_code": "lbm", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4205", + "scripts": [], + "own_tokenizer": false }, { "name": "Mal Paharia", "iso_1_code": null, "iso_3_code": "mkb", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4206", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahari", "iso_1_code": null, "iso_3_code": "nhh", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4207", + "scripts": [], + "own_tokenizer": false }, { "name": "Rohingya", "iso_1_code": null, "iso_3_code": "rhg", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4208", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Rajbanshi", "iso_1_code": null, "iso_3_code": "rjs", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4209", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Rangpuri", "iso_1_code": null, "iso_3_code": "rkt", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4210", + "scripts": [], + "own_tokenizer": false }, { "name": "Sylheti", "iso_1_code": null, "iso_3_code": "syl", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4211", + "scripts": [ + "Latn", + "Beng" + ], + "own_tokenizer": false }, { "name": "Tangchangya", "iso_1_code": null, "iso_3_code": "tnv", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4212", + "scripts": [], + "own_tokenizer": false }, { "name": "Mirgan", "iso_1_code": null, "iso_3_code": "zrg", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4213", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4194", + "scripts": [], + "own_tokenizer": false }, { "name": "Bihari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bhojpuri", "iso_1_code": null, "iso_3_code": "bho", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4215", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Hindustani, Sarnami", "iso_1_code": null, "iso_3_code": "hns", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4216", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kudmali", "iso_1_code": null, "iso_3_code": "kyw", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4217", + "scripts": [], + "own_tokenizer": false }, { "name": "Magahi", "iso_1_code": null, "iso_3_code": "mag", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4218", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Maithili", "iso_1_code": null, "iso_3_code": "mai", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4219", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Majhi", "iso_1_code": null, "iso_3_code": "mjz", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4220", + "scripts": [], + "own_tokenizer": false }, { "name": "Sadri", "iso_1_code": null, "iso_3_code": "sck", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4221", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Sadri, Oraon", "iso_1_code": null, "iso_3_code": "sdr", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4222", + "scripts": [], + "own_tokenizer": false }, { "name": "Surjapuri", "iso_1_code": null, "iso_3_code": "sjp", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4223", + "scripts": [], + "own_tokenizer": false }, { "name": "Musasa", "iso_1_code": null, "iso_3_code": "smm", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4224", + "scripts": [], + "own_tokenizer": false }, { "name": "Panchpargania", "iso_1_code": null, "iso_3_code": "tdb", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4225", + "scripts": [], + "own_tokenizer": false }, { "name": "Bajjika", "iso_1_code": null, "iso_3_code": "vjk", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4226", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4214", + "scripts": [], + "own_tokenizer": false }, { "name": "Oriya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Bodo Parja", "iso_1_code": null, "iso_3_code": "bdv", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4228", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhatri", "iso_1_code": null, "iso_3_code": "bgw", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4229", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhunjia", "iso_1_code": null, "iso_3_code": "bhu", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4230", + "scripts": [], + "own_tokenizer": false }, { "name": "Desiya", "iso_1_code": null, "iso_3_code": "dso", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4231", + "scripts": [], + "own_tokenizer": false }, { "name": "Kupia", "iso_1_code": null, "iso_3_code": "key", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4232", + "scripts": [], + "own_tokenizer": false }, { "name": "Oriya, Adivasi", "iso_1_code": null, "iso_3_code": "ort", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4233", + "scripts": [], + "own_tokenizer": false }, { "name": "Odia", "iso_1_code": "or", "iso_3_code": "ory", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "4234", + "scripts": [ + "Latn", + "Orya" + ], + "own_tokenizer": true }, { "name": "Reli", "iso_1_code": null, "iso_3_code": "rei", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4235", + "scripts": [], + "own_tokenizer": false }, { "name": "Sambalpuri", "iso_1_code": "or", "iso_3_code": "spv", - "tokenizer": { - "name": "oriya", - "tokenizer": "IndicNLPTokenizer(\"or\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "4236", + "scripts": [], + "own_tokenizer": true } - ] + ], + "node_i": "4227", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Angika", "iso_1_code": null, "iso_3_code": "anp", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4238", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Bote", "iso_1_code": null, "iso_3_code": "bmj", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4239", + "scripts": [], + "own_tokenizer": false }, { "name": "Buksa", "iso_1_code": null, "iso_3_code": "tkb", - "tokenizer": { - "name": "bengali", - "tokenizer": "SpaCyTokenizer(\"bn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4240", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4237", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4193", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Dardic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chitral", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Khowar", "iso_1_code": null, "iso_3_code": "khw", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4244", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalasha", "iso_1_code": null, "iso_3_code": "kls", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4245", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4243", + "scripts": [], + "own_tokenizer": false }, { "name": "Kashmiri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kashmiri", "iso_1_code": "ks", "iso_3_code": "kas", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4247", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4246", + "scripts": [], + "own_tokenizer": false }, { "name": "Kohistani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bateri", "iso_1_code": null, "iso_3_code": "btv", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4249", + "scripts": [], + "own_tokenizer": false }, { "name": "Chilisso", "iso_1_code": null, "iso_3_code": "clh", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4250", + "scripts": [], + "own_tokenizer": false }, { "name": "Gawri", "iso_1_code": null, "iso_3_code": "gwc", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4251", + "scripts": [], + "own_tokenizer": false }, { "name": "Gowro", "iso_1_code": null, "iso_3_code": "gwf", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4252", + "scripts": [], + "own_tokenizer": false }, { "name": "Kohistani, Indus", "iso_1_code": null, "iso_3_code": "mvy", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4253", + "scripts": [], + "own_tokenizer": false }, { "name": "Mankiyali", "iso_1_code": null, "iso_3_code": "nlm", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4254", + "scripts": [], + "own_tokenizer": false }, { "name": "Tirahi", "iso_1_code": null, "iso_3_code": "tra", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4255", + "scripts": [], + "own_tokenizer": false }, { "name": "Torwali", "iso_1_code": null, "iso_3_code": "trw", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4256", + "scripts": [], + "own_tokenizer": false }, { "name": "Degano", "iso_1_code": null, "iso_3_code": "wsv", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4257", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4248", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dameli", "iso_1_code": null, "iso_3_code": "dml", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4259", + "scripts": [], + "own_tokenizer": false }, { "name": "Gawar-Bati", "iso_1_code": null, "iso_3_code": "gwt", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4260", + "scripts": [], + "own_tokenizer": false }, { "name": "Grangali", "iso_1_code": null, "iso_3_code": "nli", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4261", + "scripts": [], + "own_tokenizer": false }, { "name": "Shumashti", "iso_1_code": null, "iso_3_code": "sts", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4262", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4258", + "scripts": [], + "own_tokenizer": false }, { "name": "Pashai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pashai, Northeast", "iso_1_code": null, "iso_3_code": "aee", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4264", + "scripts": [], + "own_tokenizer": false }, { "name": "Pashai, Northwest", "iso_1_code": null, "iso_3_code": "glh", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4265", + "scripts": [], + "own_tokenizer": false }, { "name": "Pashai, Southwest", "iso_1_code": null, "iso_3_code": "psh", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4266", + "scripts": [], + "own_tokenizer": false }, { "name": "Pashai, Southeast", "iso_1_code": null, "iso_3_code": "psi", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4267", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4263", + "scripts": [], + "own_tokenizer": false }, { "name": "Shina", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Brokskat", "iso_1_code": null, "iso_3_code": "bkk", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4269", + "scripts": [], + "own_tokenizer": false }, { "name": "Palula", "iso_1_code": null, "iso_3_code": "phl", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4270", + "scripts": [], + "own_tokenizer": false }, { "name": "Shina, Kohistani", "iso_1_code": null, "iso_3_code": "plk", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4271", + "scripts": [], + "own_tokenizer": false }, { "name": "Shina", "iso_1_code": null, "iso_3_code": "scl", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4272", + "scripts": [], + "own_tokenizer": false }, { "name": "Savi", "iso_1_code": null, "iso_3_code": "sdg", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4273", + "scripts": [], + "own_tokenizer": false }, { "name": "Kundal Shahi", "iso_1_code": null, "iso_3_code": "shd", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4274", + "scripts": [], + "own_tokenizer": false }, { "name": "Ushojo", "iso_1_code": null, "iso_3_code": "ush", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4275", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalkoti", "iso_1_code": null, "iso_3_code": "xka", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4276", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4268", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4242", + "scripts": [], + "own_tokenizer": false }, { "name": "Sindhi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Jadgali", "iso_1_code": null, "iso_3_code": "jdg", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4278", + "scripts": [], + "own_tokenizer": false }, { "name": "Kacchi", "iso_1_code": null, "iso_3_code": "kfr", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4279", + "scripts": [], + "own_tokenizer": false }, { "name": "Lasi", "iso_1_code": null, "iso_3_code": "lss", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4280", + "scripts": [], + "own_tokenizer": false }, { "name": "Luwati", "iso_1_code": null, "iso_3_code": "luv", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4281", + "scripts": [], + "own_tokenizer": false }, { "name": "Sindhi Bhil", "iso_1_code": null, "iso_3_code": "sbn", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4282", + "scripts": [], + "own_tokenizer": false }, { "name": "Sindhi", "iso_1_code": "sd", "iso_3_code": "snd", - "tokenizer": { - "name": "sindhi", - "tokenizer": "IndicNLPTokenizer(\"sd\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4283", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "own_tokenizer": true } - ] + ], + "node_i": "4277", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4241", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "marathi", - "tokenizer": "SpaCyTokenizer(\"mr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Marathi", "iso_1_code": "mr", "iso_3_code": "mar", - "tokenizer": { - "name": "marathi", - "tokenizer": "SpaCyTokenizer(\"mr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4285", + "scripts": [ + "Deva", + "Latn" + ], + "own_tokenizer": true }, { "name": "Konkani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"kok\")", + "original_lang_name": "konkani", + "original_lang_code": "kok", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"kok\")", + "original_lang_name": "konkani", + "original_lang_code": "kok", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Konkani, Goan", "iso_1_code": null, "iso_3_code": "gom", - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"kok\")", + "original_lang_name": "konkani", + "original_lang_code": "kok", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"kok\")", + "original_lang_name": "konkani", + "original_lang_code": "kok", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + } }, - "source": null, - "children": [] + "children": [], + "node_i": "4287", + "scripts": [ + "Latn", + "Deva" + ], + "own_tokenizer": true }, { "name": "Kukna", "iso_1_code": null, "iso_3_code": "kex", - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"kok\")", + "original_lang_name": "konkani", + "original_lang_code": "kok", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4288", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Katkari", "iso_1_code": null, "iso_3_code": "kfu", - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4289", + "scripts": [], + "own_tokenizer": false }, { "name": "Konkani", "iso_1_code": null, "iso_3_code": "knn", - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" + "tokenizers": { + "Latn": { + "full_object": "IndicNLPTokenizer(\"kok\")", + "original_lang_name": "konkani", + "original_lang_code": "kok", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"kok\")", + "original_lang_name": "konkani", + "original_lang_code": "kok", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + } }, - "source": null, - "children": [] + "children": [], + "node_i": "4290", + "scripts": [], + "own_tokenizer": true }, { "name": "Phudagi", "iso_1_code": null, "iso_3_code": "phd", - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4291", + "scripts": [], + "own_tokenizer": false }, { "name": "Samvedi", "iso_1_code": null, "iso_3_code": "smv", - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4292", + "scripts": [], + "own_tokenizer": false }, { "name": "Varli", "iso_1_code": null, "iso_3_code": "vav", - "tokenizer": { - "name": "konkani", - "tokenizer": "IndicNLPTokenizer(\"kok\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4293", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4286", + "scripts": [], + "own_tokenizer": false }, { "name": "Sinhalese-Maldivian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sinhala", - "tokenizer": "SpaCyTokenizer(\"si\")" + "tokenizers": { + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Maldivian", "iso_1_code": "dv", "iso_3_code": "div", - "tokenizer": { - "name": "sinhala", - "tokenizer": "SpaCyTokenizer(\"si\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4295", + "scripts": [ + "Thaa" + ], + "own_tokenizer": false }, { "name": "Sinhala", "iso_1_code": "si", "iso_3_code": "sin", - "tokenizer": { - "name": "sinhala", - "tokenizer": "SpaCyTokenizer(\"si\")" + "tokenizers": { + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4296", + "scripts": [ + "Sinh" + ], + "own_tokenizer": true }, { "name": "Veddah", "iso_1_code": null, "iso_3_code": "ved", - "tokenizer": { - "name": "sinhala", - "tokenizer": "SpaCyTokenizer(\"si\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4297", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4294", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "marathi", - "tokenizer": "SpaCyTokenizer(\"mr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"sd\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "scripts": [ + "Latn", + "Arab", + "Deva" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bhalay", "iso_1_code": null, "iso_3_code": "bhx", - "tokenizer": { - "name": "marathi", - "tokenizer": "SpaCyTokenizer(\"mr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4299", + "scripts": [], + "own_tokenizer": false }, { "name": "Deccan", "iso_1_code": null, "iso_3_code": "dcc", - "tokenizer": { - "name": "marathi", - "tokenizer": "SpaCyTokenizer(\"mr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4300", + "scripts": [], + "own_tokenizer": false }, { "name": "Gowlan", "iso_1_code": null, "iso_3_code": "goj", - "tokenizer": { - "name": "marathi", - "tokenizer": "SpaCyTokenizer(\"mr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4301", + "scripts": [], + "own_tokenizer": false }, { "name": "Varhadi-Nagpuri", "iso_1_code": null, "iso_3_code": "vah", - "tokenizer": { - "name": "marathi", - "tokenizer": "SpaCyTokenizer(\"mr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4302", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4298", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4284", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4192", + "scripts": [], + "own_tokenizer": false }, { "name": "Tharu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tharu, Rana", "iso_1_code": null, "iso_3_code": "thr", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4304", + "scripts": [], + "own_tokenizer": false }, { "name": "Tharu, Kathariya", "iso_1_code": null, "iso_3_code": "tkt", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4305", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Tharu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tharu, Central", "iso_1_code": null, "iso_3_code": "the", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4307", + "scripts": [], + "own_tokenizer": false }, { "name": "Tharu, Dangaura", "iso_1_code": null, "iso_3_code": "thl", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4308", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Tharu, Mid-Eastern", "iso_1_code": null, "iso_3_code": "thq", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4309", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4306", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4303", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Andh", "iso_1_code": null, "iso_3_code": "anr", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4311", + "scripts": [], + "own_tokenizer": false }, { "name": "Bazigar", "iso_1_code": null, "iso_3_code": "bfr", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4312", + "scripts": [], + "own_tokenizer": false }, { "name": "Chinali", "iso_1_code": null, "iso_3_code": "cih", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4313", + "scripts": [], + "own_tokenizer": false }, { "name": "Danuwar", "iso_1_code": null, "iso_3_code": "dhw", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4314", + "scripts": [], + "own_tokenizer": false }, { "name": "Darai", "iso_1_code": null, "iso_3_code": "dry", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4315", + "scripts": [], + "own_tokenizer": false }, { "name": "Dewas Rai", "iso_1_code": null, "iso_3_code": "dwz", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4316", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanjari", "iso_1_code": null, "iso_3_code": "kft", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4317", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumal", "iso_1_code": null, "iso_3_code": "kra", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4318", + "scripts": [], + "own_tokenizer": false }, { "name": "Lohar, Lahul", "iso_1_code": null, "iso_3_code": "lhl", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4319", + "scripts": [], + "own_tokenizer": false }, { "name": "Memoni", "iso_1_code": null, "iso_3_code": "mby", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4320", + "scripts": [], + "own_tokenizer": false }, { "name": "Oadki", "iso_1_code": null, "iso_3_code": "odk", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4321", + "scripts": [], + "own_tokenizer": false }, { "name": "Pali", "iso_1_code": "pi", "iso_3_code": "pli", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4322", + "scripts": [], + "own_tokenizer": false }, { "name": "Vaagri Booli", "iso_1_code": null, "iso_3_code": "vaa", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4323", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4310", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Hindi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Bundeli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bundeli", "iso_1_code": null, "iso_3_code": "bns", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4326", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4325", + "scripts": [], + "own_tokenizer": false }, { "name": "Hindustani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Hindi", "iso_1_code": "hi", "iso_3_code": "hin", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4328", + "scripts": [ + "Latn", + "Deva" + ], + "own_tokenizer": true }, { "name": "Urdu", "iso_1_code": "ur", "iso_3_code": "urd", - "tokenizer": { - "name": "urdu", - "tokenizer": "SpaCyTokenizer(\"ur\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4329", + "scripts": [ + "Latn", + "Arab" + ], + "own_tokenizer": true }, { "name": "Sansi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kabutra", "iso_1_code": null, "iso_3_code": "kbu", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4331", + "scripts": [], + "own_tokenizer": false }, { "name": "Sansi", "iso_1_code": null, "iso_3_code": "ssi", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4332", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4330", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4327", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "scripts": [ + "Latn", + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Haryanvi", "iso_1_code": null, "iso_3_code": "bgc", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4334", + "scripts": [], + "own_tokenizer": false }, { "name": "Bhaya", "iso_1_code": null, "iso_3_code": "bhe", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4335", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanauji", "iso_1_code": null, "iso_3_code": "bjj", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4336", + "scripts": [], + "own_tokenizer": false }, { "name": "Braj Bhasha", "iso_1_code": null, "iso_3_code": "bra", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4337", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghera", "iso_1_code": null, "iso_3_code": "ghr", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4338", + "scripts": [], + "own_tokenizer": false }, { "name": "Gowli", "iso_1_code": null, "iso_3_code": "gok", - "tokenizer": { - "name": "hindi", - "tokenizer": "SpaCyTokenizer(\"hi\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4339", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4333", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4324", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4068", + "scripts": [], + "own_tokenizer": false }, { "name": "Iranian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Avestan", "iso_1_code": "ae", "iso_3_code": "ave", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4341", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ossetic", "iso_1_code": "os", "iso_3_code": "oss", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4344", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Yagnobi", "iso_1_code": null, "iso_3_code": "yai", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4345", + "scripts": [], + "own_tokenizer": false }, { "name": "Yassic", "iso_1_code": null, "iso_3_code": "ysc", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4346", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4343", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pamir", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ishkashimi", "iso_1_code": null, "iso_3_code": "isk", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4349", + "scripts": [], + "own_tokenizer": false }, { "name": "Munji", "iso_1_code": null, "iso_3_code": "mnj", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4350", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanglechi", "iso_1_code": null, "iso_3_code": "sgy", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4351", + "scripts": [], + "own_tokenizer": false }, { "name": "Wakhi", "iso_1_code": null, "iso_3_code": "wbl", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4352", + "scripts": [], + "own_tokenizer": false }, { "name": "Yadgha", "iso_1_code": null, "iso_3_code": "ydg", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4353", + "scripts": [], + "own_tokenizer": false }, { "name": "Shugni-Yazgulami", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shughni", "iso_1_code": null, "iso_3_code": "sgh", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4355", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Sarikoli", "iso_1_code": null, "iso_3_code": "srh", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4356", + "scripts": [], + "own_tokenizer": false }, { "name": "Yazghulami", "iso_1_code": null, "iso_3_code": "yah", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4357", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4354", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4348", + "scripts": [], + "own_tokenizer": false }, { "name": "Pashto", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pashto, Southern", "iso_1_code": "ps", "iso_3_code": "pbt", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4359", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Pashto, Northern", "iso_1_code": "ps", "iso_3_code": "pbu", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4360", + "scripts": [], + "own_tokenizer": false }, { "name": "Pashto, Central", "iso_1_code": "ps", "iso_3_code": "pst", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4361", + "scripts": [], + "own_tokenizer": false }, { "name": "Waneci", "iso_1_code": null, "iso_3_code": "wne", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4362", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4358", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4347", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4342", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Balochi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Balochi, Southern", "iso_1_code": null, "iso_3_code": "bcc", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4366", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Balochi, Western", "iso_1_code": null, "iso_3_code": "bgn", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4367", + "scripts": [], + "own_tokenizer": false }, { "name": "Balochi, Eastern", "iso_1_code": null, "iso_3_code": "bgp", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4368", + "scripts": [], + "own_tokenizer": false }, { "name": "Bashkardi", "iso_1_code": null, "iso_3_code": "bsg", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4369", + "scripts": [], + "own_tokenizer": false }, { "name": "Koroshi", "iso_1_code": null, "iso_3_code": "ktl", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4370", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4365", + "scripts": [], + "own_tokenizer": false }, { "name": "Caspian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gilaki", "iso_1_code": null, "iso_3_code": "glk", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4372", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Mazandarani", "iso_1_code": null, "iso_3_code": "mzn", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4373", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Shahmirzadi", "iso_1_code": null, "iso_3_code": "srz", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4374", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4371", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Iran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ashtiani", "iso_1_code": null, "iso_3_code": "atn", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4376", + "scripts": [], + "own_tokenizer": false }, { "name": "Dari, Zoroastrian", "iso_1_code": null, "iso_3_code": "gbz", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4377", + "scripts": [], + "own_tokenizer": false }, { "name": "Gazi", "iso_1_code": null, "iso_3_code": "gzi", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4378", + "scripts": [], + "own_tokenizer": false }, { "name": "Khunsari", "iso_1_code": null, "iso_3_code": "kfm", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4379", + "scripts": [], + "own_tokenizer": false }, { "name": "Natanzi", "iso_1_code": null, "iso_3_code": "ntz", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4380", + "scripts": [], + "own_tokenizer": false }, { "name": "Nayini", "iso_1_code": null, "iso_3_code": "nyq", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4381", + "scripts": [], + "own_tokenizer": false }, { "name": "Parsi-Dari", "iso_1_code": null, "iso_3_code": "prd", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4382", + "scripts": [], + "own_tokenizer": false }, { "name": "Sivandi", "iso_1_code": null, "iso_3_code": "siy", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4383", + "scripts": [], + "own_tokenizer": false }, { "name": "Soi", "iso_1_code": null, "iso_3_code": "soj", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4384", + "scripts": [], + "own_tokenizer": false }, { "name": "Vafsi", "iso_1_code": null, "iso_3_code": "vaf", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4385", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4375", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurdish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Kurdish, Central", "iso_1_code": "ku", "iso_3_code": "ckb", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4387", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Kurdish, Northern", "iso_1_code": "ku", "iso_3_code": "kmr", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4388", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Laki", "iso_1_code": null, "iso_3_code": "lki", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4389", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Kurdish, Southern", "iso_1_code": "ku", "iso_3_code": "sdh", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4390", + "scripts": [ + "Arab" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4386", + "scripts": [], + "own_tokenizer": false }, { "name": "Ormuri-Parachi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ormuri", "iso_1_code": null, "iso_3_code": "oru", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4392", + "scripts": [], + "own_tokenizer": false }, { "name": "Parachi", "iso_1_code": null, "iso_3_code": "prc", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4393", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4391", + "scripts": [], + "own_tokenizer": false }, { "name": "Semnani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lasgerdi", "iso_1_code": null, "iso_3_code": "lsa", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4395", + "scripts": [], + "own_tokenizer": false }, { "name": "Sangisari", "iso_1_code": null, "iso_3_code": "sgr", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4396", + "scripts": [], + "own_tokenizer": false }, { "name": "Semnani", "iso_1_code": null, "iso_3_code": "smy", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4397", + "scripts": [], + "own_tokenizer": false }, { "name": "Sorkhei", "iso_1_code": null, "iso_3_code": "sqo", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4394", + "scripts": [], + "own_tokenizer": false }, { "name": "Talysh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Alviri-Vidari", "iso_1_code": null, "iso_3_code": "avd", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4400", + "scripts": [], + "own_tokenizer": false }, { "name": "Eshtehardi", "iso_1_code": null, "iso_3_code": "esh", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4401", + "scripts": [], + "own_tokenizer": false }, { "name": "Gozarkhani", "iso_1_code": null, "iso_3_code": "goz", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4402", + "scripts": [], + "own_tokenizer": false }, { "name": "Harzani", "iso_1_code": null, "iso_3_code": "hrz", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4403", + "scripts": [], + "own_tokenizer": false }, { "name": "Karingani", "iso_1_code": null, "iso_3_code": "kgn", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4404", + "scripts": [], + "own_tokenizer": false }, { "name": "Koresh-e Rostam", "iso_1_code": null, "iso_3_code": "okh", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4405", + "scripts": [], + "own_tokenizer": false }, { "name": "Razajerdi", "iso_1_code": null, "iso_3_code": "rat", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4406", + "scripts": [], + "own_tokenizer": false }, { "name": "Rudbari", "iso_1_code": null, "iso_3_code": "rdb", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4407", + "scripts": [], + "own_tokenizer": false }, { "name": "Shahrudi", "iso_1_code": null, "iso_3_code": "shm", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4408", + "scripts": [], + "own_tokenizer": false }, { "name": "Takestani", "iso_1_code": null, "iso_3_code": "tks", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4409", + "scripts": [], + "own_tokenizer": false }, { "name": "Talysh", "iso_1_code": null, "iso_3_code": "tly", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4410", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Taromi, Upper", "iso_1_code": null, "iso_3_code": "tov", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4411", + "scripts": [], + "own_tokenizer": false }, { "name": "Maraghei", "iso_1_code": null, "iso_3_code": "vmh", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4412", + "scripts": [], + "own_tokenizer": false }, { "name": "Kho\u2019ini", "iso_1_code": null, "iso_3_code": "xkc", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4413", + "scripts": [], + "own_tokenizer": false }, { "name": "Kajali", "iso_1_code": null, "iso_3_code": "xkj", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4414", + "scripts": [], + "own_tokenizer": false }, { "name": "Kabatei", "iso_1_code": null, "iso_3_code": "xkp", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4415", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4399", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dezfuli", "iso_1_code": null, "iso_3_code": "def", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4417", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4416", + "scripts": [], + "own_tokenizer": false }, { "name": "Zaza-Gorani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bajelani", "iso_1_code": null, "iso_3_code": "bjm", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4419", + "scripts": [], + "own_tokenizer": false }, { "name": "Zazaki, Southern", "iso_1_code": null, "iso_3_code": "diq", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4420", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gurani", "iso_1_code": null, "iso_3_code": "hac", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4421", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Zazaki, Northern", "iso_1_code": null, "iso_3_code": "kiu", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4422", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Shabak", "iso_1_code": null, "iso_3_code": "sdb", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4423", + "scripts": [], + "own_tokenizer": false }, { "name": "Sarli", "iso_1_code": null, "iso_3_code": "sdf", - "tokenizer": { - "name": "northern_kurdish", - "tokenizer": "StanzaTokenizer(\"kmr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4424", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4418", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4364", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Fars", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fars, Southwestern", "iso_1_code": null, "iso_3_code": "fay", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4427", + "scripts": [], + "own_tokenizer": false }, { "name": "Lari", "iso_1_code": null, "iso_3_code": "lrl", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4428", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4426", + "scripts": [], + "own_tokenizer": false }, { "name": "Luri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bakhti\u00e2ri", "iso_1_code": null, "iso_3_code": "bqi", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4430", + "scripts": [], + "own_tokenizer": false }, { "name": "Luri, Northern", "iso_1_code": null, "iso_3_code": "lrc", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4431", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Luri, Southern", "iso_1_code": null, "iso_3_code": "luz", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4432", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumzari", "iso_1_code": null, "iso_3_code": "zum", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4433", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4429", + "scripts": [], + "own_tokenizer": false }, { "name": "Persian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Aimaq", "iso_1_code": null, "iso_3_code": "aiq", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4435", + "scripts": [], + "own_tokenizer": false }, { "name": "Bukharic", "iso_1_code": null, "iso_3_code": "bhh", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4436", + "scripts": [], + "own_tokenizer": false }, { "name": "Dehwari", "iso_1_code": null, "iso_3_code": "deh", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4437", + "scripts": [], + "own_tokenizer": false }, { "name": "Hazaragi", "iso_1_code": null, "iso_3_code": "haz", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4438", + "scripts": [], + "own_tokenizer": false }, { "name": "Dzhidi", "iso_1_code": null, "iso_3_code": "jpr", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4439", + "scripts": [], + "own_tokenizer": false }, { "name": "Persian, Iranian", "iso_1_code": "fa", "iso_3_code": "pes", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "4440", + "scripts": [], + "own_tokenizer": true }, { "name": "Pahlavani", "iso_1_code": null, "iso_3_code": "phv", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4441", + "scripts": [], + "own_tokenizer": false }, { "name": "Dari", "iso_1_code": "fa", "iso_3_code": "prs", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "4442", + "scripts": [], + "own_tokenizer": true }, { "name": "Tajik", "iso_1_code": "tg", "iso_3_code": "tgk", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4443", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4434", + "scripts": [], + "own_tokenizer": false }, { "name": "Tat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Judeo-Tat", "iso_1_code": null, "iso_3_code": "jdt", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4445", + "scripts": [], + "own_tokenizer": false }, { "name": "Tat, Muslim", "iso_1_code": null, "iso_3_code": "ttt", - "tokenizer": { - "name": "persian", - "tokenizer": "SpaCyTokenizer(\"fa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4446", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4444", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4425", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4363", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4340", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuristani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ashkun", "iso_1_code": null, "iso_3_code": "ask", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4448", + "scripts": [], + "own_tokenizer": false }, { "name": "Kateviri", "iso_1_code": null, "iso_3_code": "bsh", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4449", + "scripts": [], + "own_tokenizer": false }, { "name": "Prasuni", "iso_1_code": null, "iso_3_code": "prn", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4450", + "scripts": [], + "own_tokenizer": false }, { "name": "Tregami", "iso_1_code": null, "iso_3_code": "trm", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4451", + "scripts": [], + "own_tokenizer": false }, { "name": "Waigali", "iso_1_code": null, "iso_3_code": "wbk", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4452", + "scripts": [], + "own_tokenizer": false }, { "name": "Komviri", "iso_1_code": null, "iso_3_code": "xvi", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4453", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4447", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Badeshi", "iso_1_code": null, "iso_3_code": "bdz", - "tokenizer": { - "name": "sanskrit", - "tokenizer": "SpaCyTokenizer(\"sa\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4455", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4454", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4067", + "scripts": [], + "own_tokenizer": false }, { "name": "Italic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "latin", - "tokenizer": "SpaCyTokenizer(\"la\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Latino-Faliscan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "latin", - "tokenizer": "SpaCyTokenizer(\"la\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"la\")", + "original_lang_name": "latin", + "original_lang_code": "lat", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Latin", "iso_1_code": "la", "iso_3_code": "lat", - "tokenizer": { - "name": "latin", - "tokenizer": "SpaCyTokenizer(\"la\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"la\")", + "original_lang_name": "latin", + "original_lang_code": "lat", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4458", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "4457", + "scripts": [], + "own_tokenizer": false }, { "name": "Romance", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Romanian", "iso_1_code": "ro", "iso_3_code": "ron", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4461", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Romanian, Istro", "iso_1_code": null, "iso_3_code": "ruo", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4462", + "scripts": [], + "own_tokenizer": false }, { "name": "Aromanian", "iso_1_code": null, "iso_3_code": "rup", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4463", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romanian, Megleno", "iso_1_code": null, "iso_3_code": "ruq", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4464", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4460", + "scripts": [], + "own_tokenizer": false }, { "name": "Italo-Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Italo-Dalmatian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"it\")", + "original_lang_name": "italian", + "original_lang_code": "ita", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Dalmatian", "iso_1_code": null, "iso_3_code": "dlm", - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4467", + "scripts": [], + "own_tokenizer": false }, { "name": "Istriot", "iso_1_code": null, "iso_3_code": "ist", - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4468", + "scripts": [], + "own_tokenizer": false }, { "name": "Italian", "iso_1_code": "it", "iso_3_code": "ita", - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"it\")", + "original_lang_name": "italian", + "original_lang_code": "ita", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4469", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Judeo-Italian", "iso_1_code": null, "iso_3_code": "itk", - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4470", + "scripts": [], + "own_tokenizer": false }, { "name": "Napoletano", "iso_1_code": null, "iso_3_code": "nap", - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"it\")", + "original_lang_name": "italian", + "original_lang_code": "ita", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4471", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sicilian", "iso_1_code": null, "iso_3_code": "scn", - "tokenizer": { - "name": "italian", - "tokenizer": "SpaCyTokenizer(\"it\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"it\")", + "original_lang_name": "italian", + "original_lang_code": "ita", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4472", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "4466", + "scripts": [], + "own_tokenizer": false + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false } - ] - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" }, - "source": "bottom", "children": [ { "name": "Gallo-Iberian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Gallo-Romance", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Gallo-Italian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lij\")", + "original_lang_name": "ligurian", + "original_lang_code": "lij", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Emilian", "iso_1_code": null, "iso_3_code": "egl", - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4477", + "scripts": [], + "own_tokenizer": false }, { "name": "Ligurian", "iso_1_code": null, "iso_3_code": "lij", - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lij\")", + "original_lang_name": "ligurian", + "original_lang_code": "lij", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4478", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Lombard", "iso_1_code": null, "iso_3_code": "lmo", - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lij\")", + "original_lang_name": "ligurian", + "original_lang_code": "lij", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4479", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Piedmontese", "iso_1_code": null, "iso_3_code": "pms", - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lij\")", + "original_lang_name": "ligurian", + "original_lang_code": "lij", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4480", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romagnol", "iso_1_code": null, "iso_3_code": "rgn", - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4481", + "scripts": [], + "own_tokenizer": false }, { "name": "Venetian", "iso_1_code": null, "iso_3_code": "vec", - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lij\")", + "original_lang_name": "ligurian", + "original_lang_code": "lij", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4482", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4476", + "scripts": [], + "own_tokenizer": false }, { "name": "Gallo-Rhaetian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "O\u00efl", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "French", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "French", "iso_1_code": "fr", "iso_3_code": "fra", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4486", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "French, Cajun", "iso_1_code": null, "iso_3_code": "frc", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4487", + "scripts": [], + "own_tokenizer": false }, { "name": "Guern\u00e9siais", "iso_1_code": null, "iso_3_code": "nrf", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4488", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Picard", "iso_1_code": null, "iso_3_code": "pcd", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4489", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Walloon", "iso_1_code": "wa", "iso_3_code": "wln", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4490", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4485", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Arpitan", "iso_1_code": null, "iso_3_code": "frp", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4492", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4491", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4484", + "scripts": [], + "own_tokenizer": false }, { "name": "Rhaetian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Friulian", "iso_1_code": null, "iso_3_code": "fur", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4494", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ladin", "iso_1_code": null, "iso_3_code": "lld", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4495", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Romansh", "iso_1_code": "rm", "iso_3_code": "roh", - "tokenizer": { - "name": "french", - "tokenizer": "SpaCyTokenizer(\"fr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4496", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4493", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4483", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4475", + "scripts": [], + "own_tokenizer": false }, { "name": "Ibero-Romance", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "catalan", - "tokenizer": "SpaCyTokenizer(\"ca\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "East Iberian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "catalan", - "tokenizer": "SpaCyTokenizer(\"ca\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ca\")", + "original_lang_name": "catalan", + "original_lang_code": "cat", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Catalan", "iso_1_code": "ca", "iso_3_code": "cat", - "tokenizer": { - "name": "catalan", - "tokenizer": "SpaCyTokenizer(\"ca\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ca\")", + "original_lang_name": "catalan", + "original_lang_code": "cat", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4499", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "4498", + "scripts": [], + "own_tokenizer": false }, { "name": "Oc", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "catalan", - "tokenizer": "SpaCyTokenizer(\"ca\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Occitan", "iso_1_code": "oc", "iso_3_code": "oci", - "tokenizer": { - "name": "catalan", - "tokenizer": "SpaCyTokenizer(\"ca\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4501", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Shuadit", "iso_1_code": null, "iso_3_code": "sdt", - "tokenizer": { - "name": "catalan", - "tokenizer": "SpaCyTokenizer(\"ca\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4502", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4500", + "scripts": [], + "own_tokenizer": false }, { "name": "West Iberian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Asturo-Leonese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Asturian", "iso_1_code": null, "iso_3_code": "ast", - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4505", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mirandese", "iso_1_code": null, "iso_3_code": "mwl", - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4506", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4504", + "scripts": [], + "own_tokenizer": false }, { "name": "Castilian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Extremaduran", "iso_1_code": null, "iso_3_code": "ext", - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4508", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ladino", "iso_1_code": null, "iso_3_code": "lad", - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4509", + "scripts": [ + "Latn", + "Hebr" + ], + "own_tokenizer": false }, { "name": "Spanish", "iso_1_code": "es", "iso_3_code": "spa", - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4510", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Spanish, Charapa", "iso_1_code": null, "iso_3_code": "spq", - "tokenizer": { - "name": "spanish", - "tokenizer": "SpaCyTokenizer(\"es\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4511", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4507", + "scripts": [], + "own_tokenizer": false }, { "name": "Portuguese-Galician", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "portuguese", - "tokenizer": "SpaCyTokenizer(\"pt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pt\")", + "original_lang_name": "portuguese", + "original_lang_code": "por", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Minderico", "iso_1_code": null, "iso_3_code": "drc", - "tokenizer": { - "name": "portuguese", - "tokenizer": "SpaCyTokenizer(\"pt\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4513", + "scripts": [], + "own_tokenizer": false }, { "name": "Fala", "iso_1_code": null, "iso_3_code": "fax", - "tokenizer": { - "name": "portuguese", - "tokenizer": "SpaCyTokenizer(\"pt\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4514", + "scripts": [], + "own_tokenizer": false }, { "name": "Galician", "iso_1_code": "gl", "iso_3_code": "glg", - "tokenizer": { - "name": "galician", - "tokenizer": "StanzaTokenizer(\"gl\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"gl\")", + "original_lang_name": "galician", + "original_lang_code": "glg", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4515", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Portuguese", "iso_1_code": "pt", "iso_3_code": "por", - "tokenizer": { - "name": "portuguese", - "tokenizer": "SpaCyTokenizer(\"pt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pt\")", + "original_lang_name": "portuguese", + "original_lang_code": "por", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4516", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "4512", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4503", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4497", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4474", + "scripts": [], + "own_tokenizer": false }, { "name": "Pyrenean-Mozarabic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pyrenean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aragonese", "iso_1_code": "an", "iso_3_code": "arg", - "tokenizer": { - "name": "ligurian", - "tokenizer": "SpaCyTokenizer(\"lij\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4519", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4518", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4517", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4473", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4465", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Corsican", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Corsican", "iso_1_code": "co", "iso_3_code": "cos", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4522", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4521", + "scripts": [], + "own_tokenizer": false }, { "name": "Sardinian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "scripts": [ + "Armn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "scripts": [ + "Grek" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Deva": { + "full_object": "SpaCyTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "scripts": [ + "Latn", + "Deva" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Gujr": { + "full_object": "SpaCyTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "scripts": [ + "Latn", + "Gujr" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "scripts": [ + "Latn", + "Guru" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": false + }, + "Beng": { + "full_object": "SpaCyTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "scripts": [ + "Latn", + "Beng" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Orya": { + "full_object": "IndicNLPTokenizer(\"or\")", + "original_lang_name": "oriya", + "original_lang_code": "ori", + "scripts": [ + "Latn", + "Orya" + ], + "class_name": "IndicNLPTokenizer", + "macrolanguage": true + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"fa\")", + "original_lang_name": "persian", + "original_lang_code": "fas", + "scripts": [ + "Arab" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Sinh": { + "full_object": "SpaCyTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "scripts": [ + "Sinh" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sardinian, Sassarese", "iso_1_code": "sc", "iso_3_code": "sdc", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4524", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sardinian, Gallurese", "iso_1_code": "sc", "iso_3_code": "sdn", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4525", + "scripts": [], + "own_tokenizer": false }, { "name": "Sardinian, Logudorese", "iso_1_code": "sc", "iso_3_code": "src", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4526", + "scripts": [], + "own_tokenizer": false }, { "name": "Sardinian, Campidanese", "iso_1_code": "sc", "iso_3_code": "sro", - "tokenizer": { - "name": "romanian", - "tokenizer": "SpaCyTokenizer(\"ro\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4527", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4523", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4520", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4459", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4456", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "3919", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Iroquoian.json b/data/Iroquoian.json index 9f7bbf28c3ae05a5b018cdbdd35f82893e08d22c..4aaa42ca592ca8717360c0fee774b07a30aa74bb 100644 --- a/data/Iroquoian.json +++ b/data/Iroquoian.json @@ -2,175 +2,222 @@ "name": "Iroquoian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cherokee", "iso_1_code": null, "iso_3_code": "chr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4529", + "scripts": [ + "Cher", + "Latn" + ], + "own_tokenizer": false }, { "name": "Northern Iroquoian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Five Nations-Huronian-Susquehannock", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Susquehannock", "iso_1_code": null, "iso_3_code": "sqn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4532", + "scripts": [], + "own_tokenizer": false }, { "name": "Five Nations-Susquehannock", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cayuga", "iso_1_code": null, "iso_3_code": "cay", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4534", + "scripts": [], + "own_tokenizer": false }, { "name": "Onondaga", "iso_1_code": null, "iso_3_code": "ono", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4535", + "scripts": [], + "own_tokenizer": false }, { "name": "Seneca", "iso_1_code": null, "iso_3_code": "see", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4536", + "scripts": [], + "own_tokenizer": false }, { "name": "Mohawk-Oneida", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mohawk", "iso_1_code": null, "iso_3_code": "moh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4538", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Oneida", "iso_1_code": null, "iso_3_code": "one", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4539", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4537", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4533", + "scripts": [], + "own_tokenizer": false }, { "name": "Huronian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Laurentian", "iso_1_code": null, "iso_3_code": "lre", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4541", + "scripts": [], + "own_tokenizer": false }, { "name": "Huron-Petun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wendat", "iso_1_code": null, "iso_3_code": "wdt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4543", + "scripts": [], + "own_tokenizer": false }, { "name": "Wyandot", "iso_1_code": null, "iso_3_code": "wyn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4544", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4542", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4540", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4531", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuscarora-Nottoway", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nottoway", "iso_1_code": null, "iso_3_code": "ntw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4546", + "scripts": [], + "own_tokenizer": false }, { "name": "Nottoway-Meherrin", "iso_1_code": null, "iso_3_code": "nwy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4547", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuscarora", "iso_1_code": null, "iso_3_code": "tus", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4548", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4545", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4530", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4528", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Jabutian.json b/data/Jabutian.json index 9cddac547855ef6fdc23c0920eccddbdd1e28484..9d0ab191bfb1493cd3501fb51f13419b711bf51c 100644 --- a/data/Jabutian.json +++ b/data/Jabutian.json @@ -2,24 +2,30 @@ "name": "Jabutian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arikap\u00fa", "iso_1_code": null, "iso_3_code": "ark", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4550", + "scripts": [], + "own_tokenizer": false }, { "name": "Jabut\u00ed", "iso_1_code": null, "iso_3_code": "jbt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4551", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4549", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Japonic.json b/data/Japonic.json index 7389e6908699b5df721ab27bef3e17ec24680969..2a129c93428237c8a45805d16d0f9aa03a23caa4 100644 --- a/data/Japonic.json +++ b/data/Japonic.json @@ -2,203 +2,264 @@ "name": "Japonic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"), ", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "scripts": [ + "Jpan" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Japanese", "iso_1_code": "ja", "iso_3_code": "jpn", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"), ", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "scripts": [ + "Jpan" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4553", + "scripts": [ + "Jpan" + ], + "own_tokenizer": true }, { "name": "Ryukyuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"), ", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "scripts": [ + "Jpan" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Amami-Okinawan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"), ", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "scripts": [ + "Jpan" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northern Amami-Okinawan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"), ", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "scripts": [ + "Jpan" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Amami-Oshima, Southern", "iso_1_code": null, "iso_3_code": "ams", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4557", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikai", "iso_1_code": null, "iso_3_code": "kzg", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4558", + "scripts": [], + "own_tokenizer": false }, { "name": "Amami-Oshima, Northern", "iso_1_code": null, "iso_3_code": "ryn", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4559", + "scripts": [], + "own_tokenizer": false }, { "name": "Toku-No-Shima", "iso_1_code": null, "iso_3_code": "tkn", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4560", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4556", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Amami-Okinawan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"), ", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "scripts": [ + "Jpan" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oki-No-Erabu", "iso_1_code": null, "iso_3_code": "okn", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4562", + "scripts": [], + "own_tokenizer": false }, { "name": "Okinawan, Central", "iso_1_code": null, "iso_3_code": "ryu", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4563", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunigami", "iso_1_code": null, "iso_3_code": "xug", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4564", + "scripts": [], + "own_tokenizer": false }, { "name": "Yoron", "iso_1_code": null, "iso_3_code": "yox", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4565", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4561", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4555", + "scripts": [], + "own_tokenizer": false }, { "name": "Sakishima", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"), ", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "scripts": [ + "Jpan" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Miyako", "iso_1_code": null, "iso_3_code": "mvi", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4567", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaeyama", "iso_1_code": null, "iso_3_code": "rys", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4568", + "scripts": [], + "own_tokenizer": false }, { "name": "Yonaguni", "iso_1_code": null, "iso_3_code": "yoi", - "tokenizer": { - "name": "japanese", - "tokenizer": "SpaCyTokenizer(\"ja\"), " - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4569", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4566", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4554", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4552", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Jean.json b/data/Jean.json index ddf66de629110f2fe8209bc483492d208f87292f..cbb00769112c1280f9bdf9c1dce6a974b345986e 100644 --- a/data/Jean.json +++ b/data/Jean.json @@ -2,181 +2,233 @@ "name": "Jean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Acro\u00e1", "iso_1_code": null, "iso_3_code": "acs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4572", + "scripts": [], + "own_tokenizer": false }, { "name": "Xav\u00e1nte", "iso_1_code": null, "iso_3_code": "xav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4573", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Xer\u00e9nte", "iso_1_code": null, "iso_3_code": "xer", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4574", + "scripts": [], + "own_tokenizer": false }, { "name": "Xakriab\u00e1", "iso_1_code": null, "iso_3_code": "xkr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4575", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4571", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apinag\u00e9", "iso_1_code": null, "iso_3_code": "apn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4577", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Panar\u00e1", "iso_1_code": null, "iso_3_code": "kre", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4578", + "scripts": [], + "own_tokenizer": false }, { "name": "Suy\u00e1", "iso_1_code": null, "iso_3_code": "suy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4579", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayap\u00f3", "iso_1_code": null, "iso_3_code": "txu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4580", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Timbira", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gavi\u00e3o, Par\u00e1", "iso_1_code": null, "iso_3_code": "gvp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4582", + "scripts": [], + "own_tokenizer": false }, { "name": "Canela", "iso_1_code": null, "iso_3_code": "ram", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4583", + "scripts": [], + "own_tokenizer": false }, { "name": "Krah\u00f4", "iso_1_code": null, "iso_3_code": "xra", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4584", + "scripts": [], + "own_tokenizer": false }, { "name": "Kreye", "iso_1_code": null, "iso_3_code": "xre", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4585", + "scripts": [], + "own_tokenizer": false }, { "name": "Krikati-Timbira", "iso_1_code": null, "iso_3_code": "xri", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4586", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4581", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4576", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Xokleng", "iso_1_code": null, "iso_3_code": "xok", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4588", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaingang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaingang", "iso_1_code": null, "iso_3_code": "kgp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4590", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kaing\u00e1ng, S\u00e3o Paulo", "iso_1_code": null, "iso_3_code": "zkp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4591", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4589", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4587", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4570", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Jicaquean.json b/data/Jicaquean.json index a4b993977f8e94dd43b5392074ea090074a6f70a..9a1889dae4093f92e80b4234c8e2a5c58402616b 100644 --- a/data/Jicaquean.json +++ b/data/Jicaquean.json @@ -2,16 +2,22 @@ "name": "Jicaquean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tol", "iso_1_code": null, "iso_3_code": "jic", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4593", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4592", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Jivaroan.json b/data/Jivaroan.json index 7916c8de8d6515a65b9a22cce88d7dbb35428b46..f8f06e737b83518a1c60d60cd823df662d947b0b 100644 --- a/data/Jivaroan.json +++ b/data/Jivaroan.json @@ -2,49 +2,69 @@ "name": "Jivaroan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awaj\u00fan", "iso_1_code": null, "iso_3_code": "agr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4595", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "J\u00edvaro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Achuar-Shiwiar", "iso_1_code": null, "iso_3_code": "acu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4597", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wamp\u00eds", "iso_1_code": null, "iso_3_code": "hub", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4598", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Shuar", "iso_1_code": null, "iso_3_code": "jiv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4599", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4596", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4594", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kamakanan.json b/data/Kamakanan.json index 2d45a7a785e7b439e5e2f0ab84c367f4177d2827..40b1db8617ecc9e6bf4317ee6481801734054a95 100644 --- a/data/Kamakanan.json +++ b/data/Kamakanan.json @@ -2,25 +2,31 @@ "name": "Kamakanan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kamak\u00e1n", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kamakan", "iso_1_code": null, "iso_3_code": "vkm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4602", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4601", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4600", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git "a/data/Karaj\303\241.json" "b/data/Karaj\303\241.json" index fb7715e76608e824ff6e3ab78587a572d9106c6f..61eed10c6502231af89f15c5cb7838610369d796 100644 --- "a/data/Karaj\303\241.json" +++ "b/data/Karaj\303\241.json" @@ -2,16 +2,22 @@ "name": "Karaj\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karaj\u00e1", "iso_1_code": null, "iso_3_code": "kpj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4604", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4603", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kartvelian.json b/data/Kartvelian.json index 9c43d188def64163b73ab39407c8491035b6786b..0203fa0bfbb6ba40fa3f19d675876ffdcadb3f00 100644 --- a/data/Kartvelian.json +++ b/data/Kartvelian.json @@ -2,75 +2,97 @@ "name": "Kartvelian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Georgian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Judeo-Georgian", "iso_1_code": null, "iso_3_code": "jge", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4607", + "scripts": [], + "own_tokenizer": false }, { "name": "Georgian", "iso_1_code": "ka", "iso_3_code": "kat", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4608", + "scripts": [ + "Geor" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4606", + "scripts": [], + "own_tokenizer": false }, { "name": "Svan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Svan", "iso_1_code": null, "iso_3_code": "sva", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4610", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4609", + "scripts": [], + "own_tokenizer": false }, { "name": "Zan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Laz", "iso_1_code": null, "iso_3_code": "lzz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4612", + "scripts": [], + "own_tokenizer": false }, { "name": "Mingrelian", "iso_1_code": null, "iso_3_code": "xmf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4613", + "scripts": [ + "Geor" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4611", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4605", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Katukinan.json b/data/Katukinan.json index 71d78e9d2350407992b35d224c8093255917a0a8..4edb529f496ea9258d587378e9d5caa595fac028 100644 --- a/data/Katukinan.json +++ b/data/Katukinan.json @@ -2,32 +2,40 @@ "name": "Katukinan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Katuk\u00edna", "iso_1_code": null, "iso_3_code": "kav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4615", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanamar\u00ed", "iso_1_code": null, "iso_3_code": "knm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4616", + "scripts": [], + "own_tokenizer": false }, { "name": "Katawixi", "iso_1_code": null, "iso_3_code": "xat", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4617", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4614", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kaure.json b/data/Kaure.json index c07a9827f2ca55e10eb054f96aec373982155e4b..96b0eef1173301c1014c3082bf86225dc970ccae 100644 --- a/data/Kaure.json +++ b/data/Kaure.json @@ -2,50 +2,62 @@ "name": "Kaure", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kapore", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kapauri", "iso_1_code": null, "iso_3_code": "khp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4620", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4619", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaure Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaure", "iso_1_code": null, "iso_3_code": "bpp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4622", + "scripts": [], + "own_tokenizer": false }, { "name": "Kosare", "iso_1_code": null, "iso_3_code": "kiq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4623", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4621", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4618", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kaweskaran.json b/data/Kaweskaran.json index 53799bb1cdfa5135334d8f45634337247e243f95..0abf44a0878ab88fc7f58ea02556e7b783298764 100644 --- a/data/Kaweskaran.json +++ b/data/Kaweskaran.json @@ -2,16 +2,20 @@ "name": "Kaweskaran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Qawasqar", "iso_1_code": null, "iso_3_code": "alc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4625", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4624", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Keresan.json b/data/Keresan.json index fcb3e9d4afd061147e82dce3eb9e325b15142f64..c96d553e3644c4de7c8df7706084e5dd8a9a66b0 100644 --- a/data/Keresan.json +++ b/data/Keresan.json @@ -2,24 +2,30 @@ "name": "Keresan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Keres, Eastern", "iso_1_code": null, "iso_3_code": "kee", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4627", + "scripts": [], + "own_tokenizer": false }, { "name": "Keres, Western", "iso_1_code": null, "iso_3_code": "kjq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4628", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4626", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Khoe-Kwadi.json b/data/Khoe-Kwadi.json index 73c964088961779d8ff3b446bef562fe1bffd7f9..255d90df65380d453470d45368ced0854e07a07e 100644 --- a/data/Khoe-Kwadi.json +++ b/data/Khoe-Kwadi.json @@ -2,202 +2,254 @@ "name": "Khoe-Kwadi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Khoe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kalahari Khoe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "North-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Shua", "iso_1_code": null, "iso_3_code": "shg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4633", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4632", + "scripts": [], + "own_tokenizer": false }, { "name": "Northeast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tshuwau", "iso_1_code": null, "iso_3_code": "hio", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4635", + "scripts": [], + "own_tokenizer": false }, { "name": "Kua", "iso_1_code": null, "iso_3_code": "tyu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4636", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4634", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "\u01c1Ani", "iso_1_code": null, "iso_3_code": "hnh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4638", + "scripts": [], + "own_tokenizer": false }, { "name": "Khwedam", "iso_1_code": null, "iso_3_code": "xuu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4639", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4637", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "\u01c1Gana", "iso_1_code": null, "iso_3_code": "gnk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4641", + "scripts": [], + "own_tokenizer": false }, { "name": "\u01c0Gwi", "iso_1_code": null, "iso_3_code": "gwj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4642", + "scripts": [], + "own_tokenizer": false }, { "name": "Naro", "iso_1_code": null, "iso_3_code": "nhr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4643", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4640", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4631", + "scripts": [], + "own_tokenizer": false }, { "name": "Khoekhoe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hainum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hai|\u01c1om", "iso_1_code": null, "iso_3_code": "hgm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4646", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4645", + "scripts": [], + "own_tokenizer": false }, { "name": "Nama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Korana", "iso_1_code": null, "iso_3_code": "kqz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4648", + "scripts": [], + "own_tokenizer": false }, { "name": "Khoekhoe", "iso_1_code": null, "iso_3_code": "naq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4649", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Xiri", "iso_1_code": null, "iso_3_code": "xii", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4650", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4647", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4644", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4630", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwadi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kwadi", "iso_1_code": null, "iso_3_code": "kwz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4652", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4651", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4629", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kiowa-Tanoan.json b/data/Kiowa-Tanoan.json index 7c58a857cade850803358662905fedc98d118c37..5682dec179bf0cfb864486f1ba17fcfd021527c9 100644 --- a/data/Kiowa-Tanoan.json +++ b/data/Kiowa-Tanoan.json @@ -2,65 +2,83 @@ "name": "Kiowa-Tanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kiowa", "iso_1_code": null, "iso_3_code": "kio", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4654", + "scripts": [], + "own_tokenizer": false }, { "name": "Piro", "iso_1_code": null, "iso_3_code": "pie", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4655", + "scripts": [], + "own_tokenizer": false }, { "name": "Tewa", "iso_1_code": null, "iso_3_code": "tew", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4656", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jemez", "iso_1_code": null, "iso_3_code": "tow", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4657", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tiwa, Southern", "iso_1_code": null, "iso_3_code": "tix", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4659", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiwa, Northern", "iso_1_code": null, "iso_3_code": "twf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4660", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4658", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4653", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Koreanic.json b/data/Koreanic.json index 1dfe3ac138182323dd9c94b8cfb07665ffd4082b..6cab6780901ed3b01d4e32f169c03676c3248d01 100644 --- a/data/Koreanic.json +++ b/data/Koreanic.json @@ -2,33 +2,54 @@ "name": "Koreanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "korean", - "tokenizer": "KiwiTokenizer()" + "tokenizers": { + "Hang": { + "full_object": "KiwiTokenizer()", + "original_lang_name": "korean", + "original_lang_code": "kor", + "scripts": [ + "Hang" + ], + "class_name": "KiwiTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Jejueo", "iso_1_code": null, "iso_3_code": "jje", - "tokenizer": { - "name": "korean", - "tokenizer": "KiwiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4662", + "scripts": [], + "own_tokenizer": false }, { "name": "Korean", "iso_1_code": "ko", "iso_3_code": "kor", - "tokenizer": { - "name": "korean", - "tokenizer": "KiwiTokenizer()" + "tokenizers": { + "Hang": { + "full_object": "KiwiTokenizer()", + "original_lang_name": "korean", + "original_lang_code": "kor", + "scripts": [ + "Hang" + ], + "class_name": "KiwiTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4663", + "scripts": [ + "Hang" + ], + "own_tokenizer": true } - ] + ], + "node_i": "4661", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kra-Dai.json b/data/Kra-Dai.json index 7da409a90df9a2449051fa9ad0a472cc70ee0c66..c9bbb5ce2e61f44595124e460604dcac1b773ab1 100644 --- a/data/Kra-Dai.json +++ b/data/Kra-Dai.json @@ -2,1156 +2,1229 @@ "name": "Kra-Dai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Nora", "iso_1_code": null, "iso_3_code": "nrr", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4665", + "scripts": [], + "own_tokenizer": false }, { "name": "Hlai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jiamao", "iso_1_code": null, "iso_3_code": "jio", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4667", + "scripts": [], + "own_tokenizer": false }, { "name": "Hlai", "iso_1_code": null, "iso_3_code": "lic", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4668", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4666", + "scripts": [], + "own_tokenizer": false }, { "name": "Kam-Tai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Kam-Sui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ai-Cham", "iso_1_code": null, "iso_3_code": "aih", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4671", + "scripts": [], + "own_tokenizer": false }, { "name": "Biao", "iso_1_code": null, "iso_3_code": "byk", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4672", + "scripts": [], + "own_tokenizer": false }, { "name": "Chadong", "iso_1_code": null, "iso_3_code": "cdy", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4673", + "scripts": [], + "own_tokenizer": false }, { "name": "Cao Miao", "iso_1_code": null, "iso_3_code": "cov", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4674", + "scripts": [], + "own_tokenizer": false }, { "name": "Dong, Northern", "iso_1_code": null, "iso_3_code": "doc", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4675", + "scripts": [], + "own_tokenizer": false }, { "name": "Dong, Southern", "iso_1_code": null, "iso_3_code": "kmc", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4676", + "scripts": [], + "own_tokenizer": false }, { "name": "Kang", "iso_1_code": null, "iso_3_code": "kyp", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4677", + "scripts": [], + "own_tokenizer": false }, { "name": "Mak", "iso_1_code": null, "iso_3_code": "mkg", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4678", + "scripts": [], + "own_tokenizer": false }, { "name": "Mulam", "iso_1_code": null, "iso_3_code": "mlm", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4679", + "scripts": [], + "own_tokenizer": false }, { "name": "Maonan", "iso_1_code": null, "iso_3_code": "mmd", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4680", + "scripts": [], + "own_tokenizer": false }, { "name": "Sui", "iso_1_code": null, "iso_3_code": "swi", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4681", + "scripts": [], + "own_tokenizer": false }, { "name": "T\u2019en", "iso_1_code": null, "iso_3_code": "tct", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4682", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4670", + "scripts": [], + "own_tokenizer": false }, { "name": "Lakkja", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lakkia", "iso_1_code": null, "iso_3_code": "lbc", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4684", + "scripts": [], + "own_tokenizer": false }, { "name": "Lingao", "iso_1_code": null, "iso_3_code": "onb", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4685", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4683", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Tai Khang", "iso_1_code": null, "iso_3_code": "tnu", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4687", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Pao", "iso_1_code": null, "iso_3_code": "tpo", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4688", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Yo", "iso_1_code": null, "iso_3_code": "tyj", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4689", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuan", "iso_1_code": null, "iso_3_code": "uan", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4690", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cao Lan", "iso_1_code": null, "iso_3_code": "mlc", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4692", + "scripts": [], + "own_tokenizer": false }, { "name": "Nung", "iso_1_code": null, "iso_3_code": "nut", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4693", + "scripts": [], + "own_tokenizer": false }, { "name": "Ts\u2019\u00fcn-Lao", "iso_1_code": null, "iso_3_code": "tsl", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4694", + "scripts": [], + "own_tokenizer": false }, { "name": "T\u00e0y", "iso_1_code": null, "iso_3_code": "tyz", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4695", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Minz", "iso_1_code": "za", "iso_3_code": "zgm", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4696", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Dai", "iso_1_code": "za", "iso_3_code": "zhd", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4697", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Nong", "iso_1_code": "za", "iso_3_code": "zhn", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4698", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Yang", "iso_1_code": "za", "iso_3_code": "zyg", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4699", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Yongnan", "iso_1_code": "za", "iso_3_code": "zyn", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4700", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Zuojiang", "iso_1_code": "za", "iso_3_code": "zzj", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4701", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4691", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bouyei", "iso_1_code": null, "iso_3_code": "pcc", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4703", + "scripts": [], + "own_tokenizer": false }, { "name": "Saek", "iso_1_code": null, "iso_3_code": "skb", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4704", + "scripts": [], + "own_tokenizer": false }, { "name": "Yoy", "iso_1_code": null, "iso_3_code": "yoy", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4705", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Central Hongshuihe", "iso_1_code": "za", "iso_3_code": "zch", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4706", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Eastern Hongshuihe", "iso_1_code": "za", "iso_3_code": "zeh", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4707", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Guibei", "iso_1_code": "za", "iso_3_code": "zgb", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4708", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Guibian", "iso_1_code": "za", "iso_3_code": "zgn", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4709", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Liujiang", "iso_1_code": "za", "iso_3_code": "zlj", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4710", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Lianshan", "iso_1_code": "za", "iso_3_code": "zln", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4711", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Liuqian", "iso_1_code": "za", "iso_3_code": "zlq", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4712", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Qiubei", "iso_1_code": "za", "iso_3_code": "zqe", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4713", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhuang, Yongbei", "iso_1_code": "za", "iso_3_code": "zyb", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4714", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zhuang, Youjiang", "iso_1_code": "za", "iso_3_code": "zyj", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4715", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4702", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Ahom", "iso_1_code": null, "iso_3_code": "aho", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4717", + "scripts": [], + "own_tokenizer": false }, { "name": "Aiton", "iso_1_code": null, "iso_3_code": "aio", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4718", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Dam", "iso_1_code": null, "iso_3_code": "blt", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4719", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tai Ya", "iso_1_code": null, "iso_3_code": "cuu", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4720", + "scripts": [], + "own_tokenizer": false }, { "name": "L\u00fc", "iso_1_code": null, "iso_3_code": "khb", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4721", + "scripts": [], + "own_tokenizer": false }, { "name": "Khamti", "iso_1_code": null, "iso_3_code": "kht", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4722", + "scripts": [], + "own_tokenizer": false }, { "name": "Kh\u00fcn", "iso_1_code": null, "iso_3_code": "kkh", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4723", + "scripts": [], + "own_tokenizer": false }, { "name": "Khamyang", "iso_1_code": null, "iso_3_code": "ksu", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4724", + "scripts": [], + "own_tokenizer": false }, { "name": "Lao", "iso_1_code": "lo", "iso_3_code": "lao", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4725", + "scripts": [ + "Laoo" + ], + "own_tokenizer": false }, { "name": "Thai, Northern", "iso_1_code": null, "iso_3_code": "nod", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "4726", + "scripts": [ + "Thai" + ], + "own_tokenizer": false }, { "name": "Nyaw", "iso_1_code": null, "iso_3_code": "nyw", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4727", + "scripts": [], + "own_tokenizer": false }, { "name": "Pa Di", "iso_1_code": null, "iso_3_code": "pdi", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4728", + "scripts": [], + "own_tokenizer": false }, { "name": "Phake", "iso_1_code": null, "iso_3_code": "phk", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4729", + "scripts": [], + "own_tokenizer": false }, { "name": "Phu Thai", "iso_1_code": null, "iso_3_code": "pht", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4730", + "scripts": [], + "own_tokenizer": false }, { "name": "Phuan", "iso_1_code": null, "iso_3_code": "phu", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4731", + "scripts": [], + "own_tokenizer": false }, { "name": "Shan", "iso_1_code": null, "iso_3_code": "shn", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4732", + "scripts": [ + "Mymr" + ], + "own_tokenizer": false }, { "name": "Thai Song", "iso_1_code": null, "iso_3_code": "soa", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4733", + "scripts": [], + "own_tokenizer": false }, { "name": "Thai, Southern", "iso_1_code": null, "iso_3_code": "sou", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4734", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai N\u00fca", "iso_1_code": null, "iso_3_code": "tdd", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4735", + "scripts": [], + "own_tokenizer": false }, { "name": "Thai", "iso_1_code": "th", "iso_3_code": "tha", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "4736", + "scripts": [ + "Thai" + ], + "own_tokenizer": true }, { "name": "Tai Long", "iso_1_code": null, "iso_3_code": "thi", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4737", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Hongjin", "iso_1_code": null, "iso_3_code": "tiz", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4738", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Laing", "iso_1_code": null, "iso_3_code": "tjl", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4739", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Thanh", "iso_1_code": null, "iso_3_code": "tmm", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4740", + "scripts": [], + "own_tokenizer": false }, { "name": "Thai, Northeastern", "iso_1_code": null, "iso_3_code": "tts", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4741", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai D\u00f3n", "iso_1_code": null, "iso_3_code": "twh", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4742", + "scripts": [], + "own_tokenizer": false }, { "name": "Thu Lao", "iso_1_code": null, "iso_3_code": "tyl", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4743", + "scripts": [], + "own_tokenizer": false }, { "name": "Tai Daeng", "iso_1_code": null, "iso_3_code": "tyr", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4744", + "scripts": [], + "own_tokenizer": false }, { "name": "T\u00e0y Sa Pa", "iso_1_code": null, "iso_3_code": "tys", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4745", + "scripts": [], + "own_tokenizer": false }, { "name": "T\u00e0y Tac", "iso_1_code": null, "iso_3_code": "tyt", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4746", + "scripts": [], + "own_tokenizer": false }, { "name": "Yong", "iso_1_code": null, "iso_3_code": "yno", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4747", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4716", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4686", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4669", + "scripts": [], + "own_tokenizer": false }, { "name": "Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buyang, Baha", "iso_1_code": null, "iso_3_code": "yha", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4750", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4749", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cun", "iso_1_code": null, "iso_3_code": "cuq", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4752", + "scripts": [], + "own_tokenizer": false }, { "name": "En", "iso_1_code": null, "iso_3_code": "enc", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4753", + "scripts": [], + "own_tokenizer": false }, { "name": "Qabiao", "iso_1_code": null, "iso_3_code": "laq", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4754", + "scripts": [], + "own_tokenizer": false }, { "name": "Laha", "iso_1_code": null, "iso_3_code": "lha", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4755", + "scripts": [], + "own_tokenizer": false }, { "name": "Buyang, Langnian", "iso_1_code": null, "iso_3_code": "yln", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4756", + "scripts": [], + "own_tokenizer": false }, { "name": "Yerong", "iso_1_code": null, "iso_3_code": "yrn", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4757", + "scripts": [], + "own_tokenizer": false }, { "name": "Buyang, E\u2019ma", "iso_1_code": null, "iso_3_code": "yzg", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4758", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4751", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "scripts": [ + "Thai" + ], + "class_name": "ThaiTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A\u2019ou", "iso_1_code": null, "iso_3_code": "aou", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4760", + "scripts": [], + "own_tokenizer": false }, { "name": "Gelao, Green", "iso_1_code": null, "iso_3_code": "giq", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4761", + "scripts": [], + "own_tokenizer": false }, { "name": "Gelao, Red", "iso_1_code": null, "iso_3_code": "gir", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4762", + "scripts": [], + "own_tokenizer": false }, { "name": "Mulao", "iso_1_code": null, "iso_3_code": "giu", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4763", + "scripts": [], + "own_tokenizer": false }, { "name": "Duoluo", "iso_1_code": null, "iso_3_code": "giw", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4764", + "scripts": [], + "own_tokenizer": false }, { "name": "Qau", "iso_1_code": null, "iso_3_code": "gqu", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4765", + "scripts": [], + "own_tokenizer": false }, { "name": "Lachi", "iso_1_code": null, "iso_3_code": "lbt", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4766", + "scripts": [], + "own_tokenizer": false }, { "name": "Lachi, White", "iso_1_code": null, "iso_3_code": "lwh", - "tokenizer": { - "name": "thai", - "tokenizer": "ThaiTokenizer()" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4767", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4759", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4748", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4664", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kuki-Chin-Naga.json b/data/Kuki-Chin-Naga.json index ae9d96e4292acf9c608c02faf9ef2aef8546cfc8..ac73b4a49e125058501c527d216b3ff96e33466e 100644 --- a/data/Kuki-Chin-Naga.json +++ b/data/Kuki-Chin-Naga.json @@ -2,7 +2,9 @@ "name": "Kuki-Chin-Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4768", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Kwomtari.json b/data/Kwomtari.json index 30013347e2b0984f25b1d810af746686601b300e..39c376abb2ac9b210c69a1067f3aaf046cf90e60 100644 --- a/data/Kwomtari.json +++ b/data/Kwomtari.json @@ -2,50 +2,62 @@ "name": "Kwomtari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guriaso", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Muno", "iso_1_code": null, "iso_3_code": "grx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4771", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4770", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear Kwomtari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nai", "iso_1_code": null, "iso_3_code": "bio", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4773", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwomtari", "iso_1_code": null, "iso_3_code": "kwo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4774", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4772", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4769", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git "a/data/Kx\342\200\231a.json" "b/data/Kx\342\200\231a.json" index 09d43b5c8977905893700ef34da4273b838d8727..fb4f3535b0505a73b8df5d0939148926c6af0feb 100644 --- "a/data/Kx\342\200\231a.json" +++ "b/data/Kx\342\200\231a.json" @@ -2,49 +2,63 @@ "name": "Kx\u2019a", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "\u01c2\u2019Amkhoe", "iso_1_code": null, "iso_3_code": "huc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4776", + "scripts": [], + "own_tokenizer": false }, { "name": "!Kung", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kung-Ekoka", "iso_1_code": null, "iso_3_code": "knw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4778", + "scripts": [], + "own_tokenizer": false }, { "name": "Ju\u01c0\u2019hoansi", "iso_1_code": null, "iso_3_code": "ktz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4779", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Northwestern !Kung", "iso_1_code": null, "iso_3_code": "vaj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4780", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4777", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4775", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Lakes Plain.json b/data/Lakes Plain.json index c149f1f38f0b304db0913f7dd00ecd2eaea1a1a9..9d784e801578d7db814977d733ced82eb5934b1b 100644 --- a/data/Lakes Plain.json +++ b/data/Lakes Plain.json @@ -2,240 +2,298 @@ "name": "Lakes Plain", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awera", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awera", "iso_1_code": null, "iso_3_code": "awr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4783", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4782", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Foau", "iso_1_code": null, "iso_3_code": "flh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4785", + "scripts": [], + "own_tokenizer": false }, { "name": "Taburta", "iso_1_code": null, "iso_3_code": "tbp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4786", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4784", + "scripts": [], + "own_tokenizer": false }, { "name": "Rasawa-Saponi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Rasawa", "iso_1_code": null, "iso_3_code": "rac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4788", + "scripts": [], + "own_tokenizer": false }, { "name": "Saponi", "iso_1_code": null, "iso_3_code": "spi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4789", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4787", + "scripts": [], + "own_tokenizer": false }, { "name": "Tariku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Edopi", "iso_1_code": null, "iso_3_code": "dbf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4792", + "scripts": [], + "own_tokenizer": false }, { "name": "Iau", "iso_1_code": null, "iso_3_code": "tmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4793", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4791", + "scripts": [], + "own_tokenizer": false }, { "name": "Duvle", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Duvle", "iso_1_code": null, "iso_3_code": "duv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4795", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4794", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Obokuitai", "iso_1_code": null, "iso_3_code": "afz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4797", + "scripts": [], + "own_tokenizer": false }, { "name": "Biritai", "iso_1_code": null, "iso_3_code": "bqq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4798", + "scripts": [], + "own_tokenizer": false }, { "name": "Eritai", "iso_1_code": null, "iso_3_code": "ert", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4799", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwerisa", "iso_1_code": null, "iso_3_code": "kkb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4800", + "scripts": [], + "own_tokenizer": false }, { "name": "Papasena", "iso_1_code": null, "iso_3_code": "pas", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4801", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaiy", "iso_1_code": null, "iso_3_code": "tcq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4802", + "scripts": [], + "own_tokenizer": false }, { "name": "Doutai", "iso_1_code": null, "iso_3_code": "tds", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4803", + "scripts": [], + "own_tokenizer": false }, { "name": "Sikaritai", "iso_1_code": null, "iso_3_code": "tty", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4804", + "scripts": [], + "own_tokenizer": false }, { "name": "Waritai", "iso_1_code": null, "iso_3_code": "wbe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4805", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4796", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fayu", "iso_1_code": null, "iso_3_code": "fau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4807", + "scripts": [], + "own_tokenizer": false }, { "name": "Kirikiri", "iso_1_code": null, "iso_3_code": "kiy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4808", + "scripts": [], + "own_tokenizer": false }, { "name": "Tause", "iso_1_code": null, "iso_3_code": "tad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4809", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4806", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4790", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4781", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Language isolate.json b/data/Language isolate.json index 0dd28fb098c1f1168d31411df97d7fbc5b795a75..2f34a8545c4589160bfabca4cc6e84edc4dd12d6 100644 --- a/data/Language isolate.json +++ b/data/Language isolate.json @@ -2,1155 +2,1256 @@ "name": "Language isolate", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Ainu", "iso_1_code": null, "iso_3_code": "ain", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4811", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mpur", "iso_1_code": null, "iso_3_code": "akc", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4812", + "scripts": [], + "own_tokenizer": false }, { "name": "Andoque", "iso_1_code": null, "iso_3_code": "ano", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4813", + "scripts": [], + "own_tokenizer": false }, { "name": "Atakapa", "iso_1_code": null, "iso_3_code": "aqp", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4814", + "scripts": [], + "own_tokenizer": false }, { "name": "Arutani", "iso_1_code": null, "iso_3_code": "atx", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4815", + "scripts": [], + "own_tokenizer": false }, { "name": "Waorani", "iso_1_code": null, "iso_3_code": "auc", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4816", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Aushiri", "iso_1_code": null, "iso_3_code": "avs", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4817", + "scripts": [], + "own_tokenizer": false }, { "name": "Odiai", "iso_1_code": null, "iso_3_code": "bhf", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4818", + "scripts": [], + "own_tokenizer": false }, { "name": "Abinomn", "iso_1_code": null, "iso_3_code": "bsa", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4819", + "scripts": [], + "own_tokenizer": false }, { "name": "Burushaski", "iso_1_code": null, "iso_3_code": "bsk", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4820", + "scripts": [], + "own_tokenizer": false }, { "name": "Callawalla", "iso_1_code": null, "iso_3_code": "caw", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4821", + "scripts": [], + "own_tokenizer": false }, { "name": "Chiquitano", "iso_1_code": null, "iso_3_code": "cax", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4822", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Canichana", "iso_1_code": null, "iso_3_code": "caz", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4823", + "scripts": [], + "own_tokenizer": false }, { "name": "Kandozi-Chapra", "iso_1_code": null, "iso_3_code": "cbu", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4824", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cent\u00fa\u00fam", "iso_1_code": null, "iso_3_code": "cet", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4825", + "scripts": [], + "own_tokenizer": false }, { "name": "Chimariko", "iso_1_code": null, "iso_3_code": "cid", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4826", + "scripts": [], + "own_tokenizer": false }, { "name": "Cof\u00e1n", "iso_1_code": null, "iso_3_code": "con", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4827", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chitimacha", "iso_1_code": null, "iso_3_code": "ctm", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4828", + "scripts": [], + "own_tokenizer": false }, { "name": "Cuitlatec", "iso_1_code": null, "iso_3_code": "cuy", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4829", + "scripts": [], + "own_tokenizer": false }, { "name": "Cayubaba", "iso_1_code": null, "iso_3_code": "cyb", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4830", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangime", "iso_1_code": null, "iso_3_code": "dba", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4831", + "scripts": [], + "own_tokenizer": false }, { "name": "Esselen", "iso_1_code": null, "iso_3_code": "esq", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4832", + "scripts": [], + "own_tokenizer": false }, { "name": "Basque", "iso_1_code": "eu", "iso_3_code": "eus", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "own", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4833", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Iat\u00ea", "iso_1_code": null, "iso_3_code": "fun", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4834", + "scripts": [], + "own_tokenizer": false }, { "name": "Laal", "iso_1_code": null, "iso_3_code": "gdm", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4835", + "scripts": [], + "own_tokenizer": false }, { "name": "Tayap", "iso_1_code": null, "iso_3_code": "gpn", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4836", + "scripts": [], + "own_tokenizer": false }, { "name": "Guat\u00f3", "iso_1_code": null, "iso_3_code": "gta", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4837", + "scripts": [], + "own_tokenizer": false }, { "name": "Hatam", "iso_1_code": null, "iso_3_code": "had", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4838", + "scripts": [], + "own_tokenizer": false }, { "name": "Hadza", "iso_1_code": null, "iso_3_code": "hts", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4839", + "scripts": [], + "own_tokenizer": false }, { "name": "Ir\u00e1ntxe", "iso_1_code": null, "iso_3_code": "irn", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4840", + "scripts": [], + "own_tokenizer": false }, { "name": "Itonama", "iso_1_code": null, "iso_3_code": "ito", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4841", + "scripts": [], + "own_tokenizer": false }, { "name": "Cams\u00e1", "iso_1_code": null, "iso_3_code": "kbh", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4842", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kusunda", "iso_1_code": null, "iso_3_code": "kgg", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4843", + "scripts": [], + "own_tokenizer": false }, { "name": "Abun", "iso_1_code": null, "iso_3_code": "kgr", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4844", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Klamath-Modoc", "iso_1_code": null, "iso_3_code": "kla", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4845", + "scripts": [], + "own_tokenizer": false }, { "name": "Kol", "iso_1_code": null, "iso_3_code": "kol", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4846", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuot", "iso_1_code": null, "iso_3_code": "kto", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4847", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kutenai", "iso_1_code": null, "iso_3_code": "kut", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4848", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunza", "iso_1_code": null, "iso_3_code": "kuz", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4849", + "scripts": [], + "own_tokenizer": false }, { "name": "Kano\u00e9", "iso_1_code": null, "iso_3_code": "kxo", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4850", + "scripts": [], + "own_tokenizer": false }, { "name": "Karok", "iso_1_code": null, "iso_3_code": "kyh", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4851", + "scripts": [], + "own_tokenizer": false }, { "name": "Karir\u00ed-Xoc\u00f3", "iso_1_code": null, "iso_3_code": "kzw", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4852", + "scripts": [], + "own_tokenizer": false }, { "name": "Leco", "iso_1_code": null, "iso_3_code": "lec", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4853", + "scripts": [], + "own_tokenizer": false }, { "name": "Molale", "iso_1_code": null, "iso_3_code": "mbe", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4854", + "scripts": [], + "own_tokenizer": false }, { "name": "Mawes", "iso_1_code": null, "iso_3_code": "mgk", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4855", + "scripts": [], + "own_tokenizer": false }, { "name": "Elseng", "iso_1_code": null, "iso_3_code": "mrf", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4856", + "scripts": [], + "own_tokenizer": false }, { "name": "Massep", "iso_1_code": null, "iso_3_code": "mvs", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4857", + "scripts": [], + "own_tokenizer": false }, { "name": "Muniche", "iso_1_code": null, "iso_3_code": "myr", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4858", + "scripts": [], + "own_tokenizer": false }, { "name": "Movima", "iso_1_code": null, "iso_3_code": "mzp", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4859", + "scripts": [], + "own_tokenizer": false }, { "name": "Yale", "iso_1_code": null, "iso_3_code": "nce", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4860", + "scripts": [], + "own_tokenizer": false }, { "name": "Natchez", "iso_1_code": null, "iso_3_code": "ncz", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4861", + "scripts": [], + "own_tokenizer": false }, { "name": "Gilyak", "iso_1_code": null, "iso_3_code": "niv", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4862", + "scripts": [], + "own_tokenizer": false }, { "name": "Nihali", "iso_1_code": null, "iso_3_code": "nll", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4863", + "scripts": [], + "own_tokenizer": false }, { "name": "Mochica", "iso_1_code": null, "iso_3_code": "omc", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4864", + "scripts": [], + "own_tokenizer": false }, { "name": "Omurano", "iso_1_code": null, "iso_3_code": "omu", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4865", + "scripts": [], + "own_tokenizer": false }, { "name": "Ofay\u00e9", "iso_1_code": null, "iso_3_code": "opy", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4866", + "scripts": [], + "own_tokenizer": false }, { "name": "Oti", "iso_1_code": null, "iso_3_code": "oti", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4867", + "scripts": [], + "own_tokenizer": false }, { "name": "Pankarar\u00fa", "iso_1_code": null, "iso_3_code": "paz", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4868", + "scripts": [], + "own_tokenizer": false }, { "name": "Pyu", "iso_1_code": null, "iso_3_code": "pby", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4869", + "scripts": [], + "own_tokenizer": false }, { "name": "Puelche", "iso_1_code": null, "iso_3_code": "pue", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4870", + "scripts": [], + "own_tokenizer": false }, { "name": "Puquina", "iso_1_code": null, "iso_3_code": "puq", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4871", + "scripts": [], + "own_tokenizer": false }, { "name": "Rikbaktsa", "iso_1_code": null, "iso_3_code": "rkb", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4872", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sandawe", "iso_1_code": null, "iso_3_code": "sad", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4873", + "scripts": [], + "own_tokenizer": false }, { "name": "Seri", "iso_1_code": null, "iso_3_code": "sei", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4874", + "scripts": [], + "own_tokenizer": false }, { "name": "Shasta", "iso_1_code": null, "iso_3_code": "sht", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4875", + "scripts": [], + "own_tokenizer": false }, { "name": "Siuslaw", "iso_1_code": null, "iso_3_code": "sis", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4876", + "scripts": [], + "own_tokenizer": false }, { "name": "Salinan", "iso_1_code": null, "iso_3_code": "sln", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4877", + "scripts": [], + "own_tokenizer": false }, { "name": "Sap\u00e9", "iso_1_code": null, "iso_3_code": "spc", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4878", + "scripts": [], + "own_tokenizer": false }, { "name": "Sulka", "iso_1_code": null, "iso_3_code": "sua", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4879", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Aikan\u00e3", "iso_1_code": null, "iso_3_code": "tba", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4880", + "scripts": [], + "own_tokenizer": false }, { "name": "Ticuna", "iso_1_code": null, "iso_3_code": "tca", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4881", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Taruma", "iso_1_code": null, "iso_3_code": "tdm", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4882", + "scripts": [], + "own_tokenizer": false }, { "name": "Timucua", "iso_1_code": null, "iso_3_code": "tjm", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4883", + "scripts": [], + "own_tokenizer": false }, { "name": "Trumai", "iso_1_code": null, "iso_3_code": "tpy", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4884", + "scripts": [], + "own_tokenizer": false }, { "name": "Tonkawa", "iso_1_code": null, "iso_3_code": "tqw", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4885", + "scripts": [], + "own_tokenizer": false }, { "name": "Taushiro", "iso_1_code": null, "iso_3_code": "trr", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4886", + "scripts": [], + "own_tokenizer": false }, { "name": "Tux\u00e1", "iso_1_code": null, "iso_3_code": "tud", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4887", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunica", "iso_1_code": null, "iso_3_code": "tun", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4888", + "scripts": [], + "own_tokenizer": false }, { "name": "Uamu\u00e9", "iso_1_code": null, "iso_3_code": "uam", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4889", + "scripts": [], + "own_tokenizer": false }, { "name": "Urarina", "iso_1_code": null, "iso_3_code": "ura", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4890", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vilela", "iso_1_code": null, "iso_3_code": "vil", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4891", + "scripts": [], + "own_tokenizer": false }, { "name": "Washo", "iso_1_code": null, "iso_3_code": "was", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4892", + "scripts": [], + "own_tokenizer": false }, { "name": "Warao", "iso_1_code": null, "iso_3_code": "wba", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4893", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "M\u00e1ku", "iso_1_code": null, "iso_3_code": "xak", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4894", + "scripts": [], + "own_tokenizer": false }, { "name": "Cayuse", "iso_1_code": null, "iso_3_code": "xcy", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4895", + "scripts": [], + "own_tokenizer": false }, { "name": "Xinca", "iso_1_code": null, "iso_3_code": "xin", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4896", + "scripts": [], + "own_tokenizer": false }, { "name": "Xukur\u00fa", "iso_1_code": null, "iso_3_code": "xoo", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4897", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Northeastern", "iso_1_code": null, "iso_3_code": "xpb", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4898", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Oyster Bay", "iso_1_code": null, "iso_3_code": "xpd", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4899", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Southeast", "iso_1_code": null, "iso_3_code": "xpf", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4900", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, North Midlands", "iso_1_code": null, "iso_3_code": "xph", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4901", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Port Sorell", "iso_1_code": null, "iso_3_code": "xpl", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4902", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Northern", "iso_1_code": null, "iso_3_code": "xpv", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4903", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Northwestern", "iso_1_code": null, "iso_3_code": "xpw", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4904", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Southwestern", "iso_1_code": null, "iso_3_code": "xpx", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4905", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasmanian, Bruny Island", "iso_1_code": null, "iso_3_code": "xpz", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4906", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwaza", "iso_1_code": null, "iso_3_code": "xwa", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4907", + "scripts": [], + "own_tokenizer": false }, { "name": "Y\u00e1mana", "iso_1_code": null, "iso_3_code": "yag", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4908", + "scripts": [], + "own_tokenizer": false }, { "name": "Hod\u00ef", "iso_1_code": null, "iso_3_code": "yau", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4909", + "scripts": [], + "own_tokenizer": false }, { "name": "Yana", "iso_1_code": null, "iso_3_code": "ynn", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4910", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuchi", "iso_1_code": null, "iso_3_code": "yuc", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4911", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuracare", "iso_1_code": null, "iso_3_code": "yuz", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "4912", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Karankawa", "iso_1_code": null, "iso_3_code": "zkk", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4913", + "scripts": [], + "own_tokenizer": false }, { "name": "Zuni", "iso_1_code": null, "iso_3_code": "zun", - "tokenizer": { - "name": "basque", - "tokenizer": "SpaCyTokenizer(\"eu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4914", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4810", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Lencan.json b/data/Lencan.json index e24413758109ccdb18ed9720d15a9031eadd78a5..775ab43a69f0fcf2935aed50dd36bd00f468bfc5 100644 --- a/data/Lencan.json +++ b/data/Lencan.json @@ -2,16 +2,20 @@ "name": "Lencan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lenca", "iso_1_code": null, "iso_3_code": "len", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4916", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4915", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Lower Mamberamo.json b/data/Lower Mamberamo.json index 4a7ff08ca46ba0ef135685cc9cbc09a39a7313e5..daec1ff124f57da93c2d3930b8ed9c4980df2819 100644 --- a/data/Lower Mamberamo.json +++ b/data/Lower Mamberamo.json @@ -2,24 +2,30 @@ "name": "Lower Mamberamo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Warembori", "iso_1_code": null, "iso_3_code": "wsa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4918", + "scripts": [], + "own_tokenizer": false }, { "name": "Yoke", "iso_1_code": null, "iso_3_code": "yki", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4919", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4917", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Maiduan.json b/data/Maiduan.json index 771b04cf3901778ececb22436a4fed32ae1fd584..9b7732c55544daca151c8630ac55d1284faa4672 100644 --- a/data/Maiduan.json +++ b/data/Maiduan.json @@ -2,49 +2,61 @@ "name": "Maiduan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Maidu, Northwest", "iso_1_code": null, "iso_3_code": "mjd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4921", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisenan", "iso_1_code": null, "iso_3_code": "nsz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4922", + "scripts": [], + "own_tokenizer": false }, { "name": "Maidu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Maidu, Northeast", "iso_1_code": null, "iso_3_code": "nmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4924", + "scripts": [], + "own_tokenizer": false }, { "name": "Maidu, Valley", "iso_1_code": null, "iso_3_code": "vmv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4925", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4923", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4920", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Maipurean.json b/data/Maipurean.json index 7d831f32dbcc1b7e76c9ebe64bbab6129f80d51b..8d161b1d965323bb2161d176207817ed16c198f1 100644 --- a/data/Maipurean.json +++ b/data/Maipurean.json @@ -2,761 +2,993 @@ "name": "Maipurean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Palikur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Palik\u00far", "iso_1_code": null, "iso_3_code": "plu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4930", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4929", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4928", + "scripts": [], + "own_tokenizer": false }, { "name": "Maritime", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ta-Maipurean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arawak", "iso_1_code": null, "iso_3_code": "arw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4933", + "scripts": [], + "own_tokenizer": false }, { "name": "Wayuu", "iso_1_code": null, "iso_3_code": "guc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4934", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Paraujano", "iso_1_code": null, "iso_3_code": "pbg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4935", + "scripts": [], + "own_tokenizer": false }, { "name": "Ta\u00edno", "iso_1_code": null, "iso_3_code": "tnq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4936", + "scripts": [], + "own_tokenizer": false }, { "name": "I\u00f1eri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Garifuna", "iso_1_code": null, "iso_3_code": "cab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4938", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Island Carib", "iso_1_code": null, "iso_3_code": "crb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4939", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4937", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4932", + "scripts": [], + "own_tokenizer": false }, { "name": "Wapixana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Atorada", "iso_1_code": null, "iso_3_code": "aox", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4941", + "scripts": [], + "own_tokenizer": false }, { "name": "Mapidian", "iso_1_code": null, "iso_3_code": "mpw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4942", + "scripts": [], + "own_tokenizer": false }, { "name": "Wapishana", "iso_1_code": null, "iso_3_code": "wap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4943", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4940", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4931", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper Amazon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Res\u00edgaro", "iso_1_code": null, "iso_3_code": "rgr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4945", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Upper Amazon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bar\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bar\u00e9", "iso_1_code": null, "iso_3_code": "bae", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4948", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4947", + "scripts": [], + "own_tokenizer": false }, { "name": "Yavitero", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baniva", "iso_1_code": null, "iso_3_code": "bvv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4950", + "scripts": [], + "own_tokenizer": false }, { "name": "Yavitero", "iso_1_code": null, "iso_3_code": "yvt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4951", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4949", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4946", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Nawiki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tariana", "iso_1_code": null, "iso_3_code": "tae", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4953", + "scripts": [], + "own_tokenizer": false }, { "name": "Karu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baniwa", "iso_1_code": null, "iso_3_code": "bwi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4955", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Curripaco", "iso_1_code": null, "iso_3_code": "kpc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4956", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4954", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4952", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Xiri\u00e2na", "iso_1_code": null, "iso_3_code": "xir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4958", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaba\u00e2na", "iso_1_code": null, "iso_3_code": "ybn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4959", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4957", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Nawiki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cabiyar\u00ed", "iso_1_code": null, "iso_3_code": "cbb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4961", + "scripts": [], + "own_tokenizer": false }, { "name": "Yucuna", "iso_1_code": null, "iso_3_code": "ycn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4962", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Piapoco", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Achagua", "iso_1_code": null, "iso_3_code": "aca", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4964", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Piapoco", "iso_1_code": null, "iso_3_code": "pio", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4965", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4963", + "scripts": [], + "own_tokenizer": false }, { "name": "Warekena", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guarequena", "iso_1_code": null, "iso_3_code": "gae", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4967", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandahuaca", "iso_1_code": null, "iso_3_code": "mht", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4968", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4966", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4960", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4944", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4927", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Campa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ash\u00e1ninka", "iso_1_code": null, "iso_3_code": "cni", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4971", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ash\u00e9ninga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ash\u00e9ninka, Pajonal", "iso_1_code": null, "iso_3_code": "cjo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4973", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nanti", "iso_1_code": null, "iso_3_code": "cox", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4974", + "scripts": [], + "own_tokenizer": false }, { "name": "Ash\u00e9ninka, Ucayali-Yur\u00faa", "iso_1_code": null, "iso_3_code": "cpb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4975", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ajy\u00edninka Apurucayali", "iso_1_code": null, "iso_3_code": "cpc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4976", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ash\u00e9ninka, Pichis", "iso_1_code": null, "iso_3_code": "cpu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4977", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ash\u00e9ninka, South Ucayali", "iso_1_code": null, "iso_3_code": "cpy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4978", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nomatsigenga", "iso_1_code": null, "iso_3_code": "not", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4979", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ash\u00e9ninka, Peren\u00e9", "iso_1_code": null, "iso_3_code": "prq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4980", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4972", + "scripts": [], + "own_tokenizer": false }, { "name": "Machiguenga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Caquinte", "iso_1_code": null, "iso_3_code": "cot", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4982", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Matsigenka", "iso_1_code": null, "iso_3_code": "mcb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4983", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4981", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4970", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pares\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Parec\u00eds", "iso_1_code": null, "iso_3_code": "pab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4986", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Saraveca", "iso_1_code": null, "iso_3_code": "sar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4987", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4985", + "scripts": [], + "own_tokenizer": false }, { "name": "Waur\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mehin\u00e1ku", "iso_1_code": null, "iso_3_code": "mmh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4989", + "scripts": [], + "own_tokenizer": false }, { "name": "Waur\u00e1-Meinaku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Waur\u00e1", "iso_1_code": null, "iso_3_code": "wau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4991", + "scripts": [], + "own_tokenizer": false }, { "name": "Yawalapit\u00ed", "iso_1_code": null, "iso_3_code": "yaw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4992", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4990", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4988", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4984", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Outlier", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mojo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baure", "iso_1_code": null, "iso_3_code": "brg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4995", + "scripts": [], + "own_tokenizer": false }, { "name": "Paunaka", "iso_1_code": null, "iso_3_code": "pnk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4996", + "scripts": [], + "own_tokenizer": false }, { "name": "Mojo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ignaciano", "iso_1_code": null, "iso_3_code": "ign", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4998", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Trinitario", "iso_1_code": null, "iso_3_code": "trn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "4999", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "4997", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4994", + "scripts": [], + "own_tokenizer": false }, { "name": "Piro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apurin\u00e3", "iso_1_code": null, "iso_3_code": "apu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5001", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "I\u00f1apari", "iso_1_code": null, "iso_3_code": "inp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5002", + "scripts": [], + "own_tokenizer": false }, { "name": "Piro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mashco Piro", "iso_1_code": null, "iso_3_code": "cuj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5004", + "scripts": [], + "own_tokenizer": false }, { "name": "Machinere", "iso_1_code": null, "iso_3_code": "mpd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5005", + "scripts": [], + "own_tokenizer": false }, { "name": "Yine", "iso_1_code": null, "iso_3_code": "pib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5006", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5003", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5000", + "scripts": [], + "own_tokenizer": false }, { "name": "Terena", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chan\u00e9", "iso_1_code": null, "iso_3_code": "caj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5008", + "scripts": [], + "own_tokenizer": false }, { "name": "Guana", "iso_1_code": null, "iso_3_code": "gqn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5009", + "scripts": [], + "own_tokenizer": false }, { "name": "Ter\u00eana", "iso_1_code": null, "iso_3_code": "ter", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5010", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5007", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4993", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yanesha\u2019", "iso_1_code": null, "iso_3_code": "ame", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5012", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chamicuro", "iso_1_code": null, "iso_3_code": "ccc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5013", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5011", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4969", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mawayana", "iso_1_code": null, "iso_3_code": "mzx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5015", + "scripts": [], + "own_tokenizer": false }, { "name": "Enawen\u00e9-Naw\u00e9", "iso_1_code": null, "iso_3_code": "unk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5016", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5014", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "4926", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mairasi.json b/data/Mairasi.json index 751b5f90d238bec310c936993fc1b3986dc70d61..441a8d19bc3f84a196cca6e8a5cf97e6ef06e3ae 100644 --- a/data/Mairasi.json +++ b/data/Mairasi.json @@ -2,32 +2,40 @@ "name": "Mairasi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Semimi", "iso_1_code": null, "iso_3_code": "etz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5018", + "scripts": [], + "own_tokenizer": false }, { "name": "Mer", "iso_1_code": null, "iso_3_code": "mnu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5019", + "scripts": [], + "own_tokenizer": false }, { "name": "Mairasi", "iso_1_code": null, "iso_3_code": "zrs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5020", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5017", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mapudungu.json b/data/Mapudungu.json index 2a53e0e4c4a95a3f760c11410ad39012df1ef93a..6688ab007793c0cb9b453ca9da047aefbece3d4e 100644 --- a/data/Mapudungu.json +++ b/data/Mapudungu.json @@ -2,24 +2,32 @@ "name": "Mapudungu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mapudungun", "iso_1_code": null, "iso_3_code": "arn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5022", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Huilliche", "iso_1_code": null, "iso_3_code": "huh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5023", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5021", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mascoyan.json b/data/Mascoyan.json index b78095f18fb09e91366f7a8f1e1c439ab823f5a9..50b5b1b81b532a2dce7e80adb3a1df535d20ff05 100644 --- a/data/Mascoyan.json +++ b/data/Mascoyan.json @@ -2,65 +2,85 @@ "name": "Mascoyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angait\u00e9", "iso_1_code": null, "iso_3_code": "aqt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5025", + "scripts": [], + "own_tokenizer": false }, { "name": "Enlhet", "iso_1_code": null, "iso_3_code": "enl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5026", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Enxet", "iso_1_code": null, "iso_3_code": "enx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5027", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guana", "iso_1_code": null, "iso_3_code": "gva", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5028", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanapan\u00e1", "iso_1_code": null, "iso_3_code": "spn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5029", + "scripts": [], + "own_tokenizer": false }, { "name": "Mascoy", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Toba-Maskoy", "iso_1_code": null, "iso_3_code": "tmf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5031", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5030", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5024", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Matacoan.json b/data/Matacoan.json index 3066afda37fa01e70cccb355c966a760ac82c88f..d40054d9a0d186621c009507cdb35a1ccf0447aa 100644 --- a/data/Matacoan.json +++ b/data/Matacoan.json @@ -2,82 +2,114 @@ "name": "Matacoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nivacl\u00e9", "iso_1_code": null, "iso_3_code": "cag", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5033", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maka", "iso_1_code": null, "iso_3_code": "mca", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5034", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chorote", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chorote, Iyo\u2019wujwa", "iso_1_code": null, "iso_3_code": "crq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5036", + "scripts": [], + "own_tokenizer": false }, { "name": "Chorote, Iyojwa\u2019ja", "iso_1_code": null, "iso_3_code": "crt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5037", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5035", + "scripts": [], + "own_tokenizer": false }, { "name": "Mataco", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Weenhayek", "iso_1_code": null, "iso_3_code": "mtp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5039", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pilcomayo Wich\u00ed", "iso_1_code": null, "iso_3_code": "mzh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5040", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bermejo Wich\u00ed", "iso_1_code": null, "iso_3_code": "wlv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5041", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5038", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5032", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Maxakalian.json b/data/Maxakalian.json index 14f3b1697902de7d7f5ae0cb27d1868502cae4bd..345b7743b6c9bcbbb29d927600ca4ef29b4c79c9 100644 --- a/data/Maxakalian.json +++ b/data/Maxakalian.json @@ -2,24 +2,32 @@ "name": "Maxakalian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Maxakal\u00ed", "iso_1_code": null, "iso_3_code": "mbl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5043", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Patax\u00f3 H\u00e3-Ha-H\u00e3e", "iso_1_code": null, "iso_3_code": "pth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5044", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5042", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mayan.json b/data/Mayan.json index 618a986357bc1f18a1d70c80068516cbf9cf870c..630427a30962a8274daec5b946fc39d38db14ca4 100644 --- a/data/Mayan.json +++ b/data/Mayan.json @@ -2,526 +2,700 @@ "name": "Mayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Huastecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chicomuceltec", "iso_1_code": null, "iso_3_code": "cob", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5047", + "scripts": [], + "own_tokenizer": false }, { "name": "Huastec", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Huastec", "iso_1_code": null, "iso_3_code": "hus", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5049", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5048", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5046", + "scripts": [], + "own_tokenizer": false }, { "name": "Yucatecan-Core Mayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Core Mayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cholan-Tzeltalan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cholan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chol-Chontal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chontal, Tabasco", "iso_1_code": null, "iso_3_code": "chf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5055", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chol", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chol", "iso_1_code": null, "iso_3_code": "ctu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5057", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5056", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5054", + "scripts": [], + "own_tokenizer": false }, { "name": "Chorti-Cholti", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ch\u2019orti\u2019", "iso_1_code": null, "iso_3_code": "caa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5059", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5058", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5053", + "scripts": [], + "own_tokenizer": false }, { "name": "Tzeltalan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tzeltal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tzeltal", "iso_1_code": null, "iso_3_code": "tzh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5062", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5061", + "scripts": [], + "own_tokenizer": false }, { "name": "Tzotzil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tzotzil", "iso_1_code": null, "iso_3_code": "tzo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5064", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5063", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5060", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5052", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5051", + "scripts": [], + "own_tokenizer": false }, { "name": "K\u2019ichean-Mamean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "K\u2019ichean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Q\ua78ceqchi\ua78c", "iso_1_code": null, "iso_3_code": "kek", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5067", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Uspanteko", "iso_1_code": null, "iso_3_code": "usp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5068", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Poqom-K\u2019ichean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Core K\u2019ichean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Achi", "iso_1_code": null, "iso_3_code": "acr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5071", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "K\u2019iche\u2019", "iso_1_code": null, "iso_3_code": "quc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5072", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sipakapense", "iso_1_code": null, "iso_3_code": "qum", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5073", + "scripts": [], + "own_tokenizer": false }, { "name": "Sakapulteko", "iso_1_code": null, "iso_3_code": "quv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5074", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaqchikel-Tz\u2019utujil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaqchikel", "iso_1_code": null, "iso_3_code": "cak", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5076", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tz\u2019utujil", "iso_1_code": null, "iso_3_code": "tzj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5077", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5075", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5070", + "scripts": [], + "own_tokenizer": false }, { "name": "Poqom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Poqomam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Poqomam", "iso_1_code": null, "iso_3_code": "poc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5080", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5079", + "scripts": [], + "own_tokenizer": false }, { "name": "Poqomchi\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Poqomchi\u2019", "iso_1_code": null, "iso_3_code": "poh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5082", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5081", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5078", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5069", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5066", + "scripts": [], + "own_tokenizer": false }, { "name": "Mamean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awakateko-Ixil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awakateko", "iso_1_code": null, "iso_3_code": "agu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5085", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ixil", "iso_1_code": null, "iso_3_code": "ixl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5086", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5084", + "scripts": [], + "own_tokenizer": false }, { "name": "Teco-Mam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mam", "iso_1_code": null, "iso_3_code": "mam", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5088", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tektiteko", "iso_1_code": null, "iso_3_code": "ttc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5089", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5087", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5083", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5065", + "scripts": [], + "own_tokenizer": false }, { "name": "Q\u2019anjob\u2019alan-Chujean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chujean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chuj", "iso_1_code": null, "iso_3_code": "cac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5092", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tojolabal", "iso_1_code": null, "iso_3_code": "toj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5093", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5091", + "scripts": [], + "own_tokenizer": false }, { "name": "Q\u2019anjob\u2019alan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mocho", "iso_1_code": null, "iso_3_code": "mhc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5095", + "scripts": [], + "own_tokenizer": false }, { "name": "Q\u2019anjob\u2019al-Akateko-Jakalteko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jakalteko", "iso_1_code": null, "iso_3_code": "jac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5097", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Q\u2019anjob\u2019al", "iso_1_code": null, "iso_3_code": "kjb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5098", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Akateko", "iso_1_code": null, "iso_3_code": "knj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5099", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5096", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5094", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5090", + "scripts": [], + "own_tokenizer": false }, { "name": "Yucatecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mopan-Itz\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Itza\u2019", "iso_1_code": null, "iso_3_code": "itz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5102", + "scripts": [], + "own_tokenizer": false }, { "name": "Maya, Mop\u00e1n", "iso_1_code": null, "iso_3_code": "mop", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5103", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5101", + "scripts": [], + "own_tokenizer": false }, { "name": "Yucatec-Lacandon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lacandon", "iso_1_code": null, "iso_3_code": "lac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5105", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maya, Yucatec", "iso_1_code": null, "iso_3_code": "yua", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5106", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5104", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5100", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5050", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5045", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Maybrat.json b/data/Maybrat.json index d4a315dafde8702b6fd364cb911099759fe5b938..6a6957c52371729622c6b5416f897c8362d25356 100644 --- a/data/Maybrat.json +++ b/data/Maybrat.json @@ -2,24 +2,30 @@ "name": "Maybrat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mai Brat", "iso_1_code": null, "iso_3_code": "ayz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5108", + "scripts": [], + "own_tokenizer": false }, { "name": "Karon Dori", "iso_1_code": null, "iso_3_code": "kgw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5109", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5107", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Misumalpan.json b/data/Misumalpan.json index fa83f4041608a93a5c5ea940bd4471d96edc4464..38790e7173c5885c074586f67ead19274c7d976a 100644 --- a/data/Misumalpan.json +++ b/data/Misumalpan.json @@ -2,66 +2,86 @@ "name": "Misumalpan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "M\u00edskito", "iso_1_code": null, "iso_3_code": "miq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5111", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ulwa", "iso_1_code": null, "iso_3_code": "ulw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5112", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayangna", "iso_1_code": null, "iso_3_code": "yan", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5113", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sumu-Cacaopera-Matagalpa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cacaopera-Matagalpa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cacaopera", "iso_1_code": null, "iso_3_code": "ccr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5116", + "scripts": [], + "own_tokenizer": false }, { "name": "Matagalpa", "iso_1_code": null, "iso_3_code": "mtn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5117", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5115", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5114", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5110", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Miwok-Costanoan.json b/data/Miwok-Costanoan.json index 8412ba2c87fd6e0fd67289f8f3a24870dcd5dad4..2725f5e5f0706b7eed276957a51fa83833c27708 100644 --- a/data/Miwok-Costanoan.json +++ b/data/Miwok-Costanoan.json @@ -2,133 +2,165 @@ "name": "Miwok-Costanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Costanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ohlone, Southern", "iso_1_code": null, "iso_3_code": "css", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5120", + "scripts": [], + "own_tokenizer": false }, { "name": "Ohlone, Northern", "iso_1_code": null, "iso_3_code": "cst", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5121", + "scripts": [], + "own_tokenizer": false }, { "name": "Karkin", "iso_1_code": null, "iso_3_code": "krb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5122", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5119", + "scripts": [], + "own_tokenizer": false }, { "name": "Miwokan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern Miwokan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bay Miwok", "iso_1_code": null, "iso_3_code": "mkq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5125", + "scripts": [], + "own_tokenizer": false }, { "name": "Miwok, Plains", "iso_1_code": null, "iso_3_code": "pmw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5126", + "scripts": [], + "own_tokenizer": false }, { "name": "Sierra Miwok", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Miwok, Central Sierra", "iso_1_code": null, "iso_3_code": "csm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5128", + "scripts": [], + "own_tokenizer": false }, { "name": "Miwok, Northern Sierra", "iso_1_code": null, "iso_3_code": "nsq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5129", + "scripts": [], + "own_tokenizer": false }, { "name": "Miwok, Southern Sierra", "iso_1_code": null, "iso_3_code": "skd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5130", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5127", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5124", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Miwokan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Miwok, Coast", "iso_1_code": null, "iso_3_code": "csi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5132", + "scripts": [], + "own_tokenizer": false }, { "name": "Miwok, Lake", "iso_1_code": null, "iso_3_code": "lmw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5133", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5131", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5123", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5118", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mixe-Zoquean.json b/data/Mixe-Zoquean.json index 35ebec77ff71779c41339017814196cf2eea160a..7a531769ec3b3e4c5a929b8fb039b9e1af1d6459 100644 --- a/data/Mixe-Zoquean.json +++ b/data/Mixe-Zoquean.json @@ -2,225 +2,301 @@ "name": "Mixe-Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mixean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Popoluca, Oluta", "iso_1_code": null, "iso_3_code": "plo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5136", + "scripts": [], + "own_tokenizer": false }, { "name": "Popoluca, Sayula", "iso_1_code": null, "iso_3_code": "pos", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5137", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Oaxaca Mixean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mixe, Totontepec", "iso_1_code": null, "iso_3_code": "mto", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5139", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixe, North Central", "iso_1_code": null, "iso_3_code": "neq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5140", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixe, Quetzaltepec", "iso_1_code": null, "iso_3_code": "pxm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5141", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lowland Mixe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mixe, Coatl\u00e1n", "iso_1_code": null, "iso_3_code": "mco", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5143", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixe, Isthmus", "iso_1_code": null, "iso_3_code": "mir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5144", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixe, Mazatl\u00e1n", "iso_1_code": null, "iso_3_code": "mzl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5145", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5142", + "scripts": [], + "own_tokenizer": false }, { "name": "Midland Mixe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mixe, Juquila", "iso_1_code": null, "iso_3_code": "mxq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5147", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5146", + "scripts": [], + "own_tokenizer": false }, { "name": "South Highland Mixe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mixe, Tlahuitoltepec", "iso_1_code": null, "iso_3_code": "mxp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5149", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5148", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5138", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5135", + "scripts": [], + "own_tokenizer": false }, { "name": "Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Zoque, Chimalapa", "iso_1_code": null, "iso_3_code": "zoh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5151", + "scripts": [], + "own_tokenizer": false }, { "name": "Chiapas Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Zoque, Copainal\u00e1", "iso_1_code": null, "iso_3_code": "zoc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5153", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zoque, Francisco Le\u00f3n", "iso_1_code": null, "iso_3_code": "zos", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5154", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Northeast Zoque", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Zoque, Ray\u00f3n", "iso_1_code": null, "iso_3_code": "zor", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5156", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5155", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5152", + "scripts": [], + "own_tokenizer": false }, { "name": "Gulf Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Popoluca, Highland", "iso_1_code": null, "iso_3_code": "poi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5158", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Popoluca, Texistepec", "iso_1_code": null, "iso_3_code": "poq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5159", + "scripts": [], + "own_tokenizer": false }, { "name": "Zoque, Tabasco", "iso_1_code": null, "iso_3_code": "zoq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5160", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5157", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5150", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5134", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mixed language.json b/data/Mixed language.json index 9c0727a3a80de1c0d00666f0c453b0584f3b85af..050cccc558d3ef09bd25e62affc2db97773a3174 100644 --- a/data/Mixed language.json +++ b/data/Mixed language.json @@ -2,415 +2,519 @@ "name": "Mixed language", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "E", "iso_1_code": null, "iso_3_code": "eee", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5162", + "scripts": [], + "own_tokenizer": false }, { "name": "N\u2019Ko", "iso_1_code": null, "iso_3_code": "nqo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5163", + "scripts": [ + "Nkoo" + ], + "own_tokenizer": false }, { "name": "Armenian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lomavren", "iso_1_code": null, "iso_3_code": "rmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5165", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5164", + "scripts": [], + "own_tokenizer": false }, { "name": "Bantu-Cushitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mbugu", "iso_1_code": null, "iso_3_code": "mhd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5167", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5166", + "scripts": [], + "own_tokenizer": false }, { "name": "Basque-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Erromintxela", "iso_1_code": null, "iso_3_code": "emx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5169", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5168", + "scripts": [], + "own_tokenizer": false }, { "name": "Cakchiquel-Quich\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaqchikel-K\u2019iche\u2019 Mixed Language", "iso_1_code": null, "iso_3_code": "ckz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5171", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5170", + "scripts": [], + "own_tokenizer": false }, { "name": "Cebuano-Spanish-English", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eskayan", "iso_1_code": null, "iso_3_code": "esy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5173", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5172", + "scripts": [], + "own_tokenizer": false }, { "name": "Chinese-Tibetan-Bonan Mongour", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wutunhua", "iso_1_code": null, "iso_3_code": "wuh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5175", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5174", + "scripts": [], + "own_tokenizer": false }, { "name": "Danish-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Traveller Danish", "iso_1_code": null, "iso_3_code": "rmd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5177", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5176", + "scripts": [], + "own_tokenizer": false }, { "name": "English-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angloromani", "iso_1_code": null, "iso_3_code": "rme", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5179", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5178", + "scripts": [], + "own_tokenizer": false }, { "name": "French-Cree", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Michif", "iso_1_code": null, "iso_3_code": "crg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5181", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5180", + "scripts": [], + "own_tokenizer": false }, { "name": "German-Yiddish-Romani-Rotwelsch", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yeniche", "iso_1_code": null, "iso_3_code": "yec", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5183", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5182", + "scripts": [], + "own_tokenizer": false }, { "name": "Greek-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Romano-Greek", "iso_1_code": null, "iso_3_code": "rge", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5185", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5184", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurindji-Kriol", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gurindji Kriol", "iso_1_code": null, "iso_3_code": "gjr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5187", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5186", + "scripts": [], + "own_tokenizer": false }, { "name": "Iberian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cal\u00f3", "iso_1_code": null, "iso_3_code": "rmq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5189", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5188", + "scripts": [], + "own_tokenizer": false }, { "name": "Irish-undocumented", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Shelta", "iso_1_code": null, "iso_3_code": "sth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5191", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5190", + "scripts": [], + "own_tokenizer": false }, { "name": "Kannada-Malayalam-Tamil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chetti, Moundadan", "iso_1_code": null, "iso_3_code": "cty", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5193", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5192", + "scripts": [], + "own_tokenizer": false }, { "name": "Norwegian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Norwegian, Traveller", "iso_1_code": null, "iso_3_code": "rmg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5195", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5194", + "scripts": [], + "own_tokenizer": false }, { "name": "Russian-Aleut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aleut, Mednyj", "iso_1_code": null, "iso_3_code": "mud", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5197", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5196", + "scripts": [], + "own_tokenizer": false }, { "name": "Serbian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Romano-Serbian", "iso_1_code": null, "iso_3_code": "rsb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5199", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5198", + "scripts": [], + "own_tokenizer": false }, { "name": "Songhay-Berber", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tagdal", "iso_1_code": null, "iso_3_code": "tda", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5200", + "scripts": [], + "own_tokenizer": false }, { "name": "Spanish-Quichua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Media Lengua", "iso_1_code": null, "iso_3_code": "mue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5203", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5202", + "scripts": [], + "own_tokenizer": false }, { "name": "Swedish-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Romani, Tavringer", "iso_1_code": null, "iso_3_code": "rmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5205", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5204", + "scripts": [], + "own_tokenizer": false }, { "name": "Yapese-Ulithi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nguluwan", "iso_1_code": null, "iso_3_code": "nuw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5207", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5206", + "scripts": [], + "own_tokenizer": false }, { "name": "Zulu-Bantu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Camtho", "iso_1_code": null, "iso_3_code": "cmt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5209", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5208", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5161", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mongol-Langam.json b/data/Mongol-Langam.json index 420cbaf4379c1468620fb9cc7de4e0800c366c68..12bcf2a7e9f069e5aadb8666d2fab78548748a60 100644 --- a/data/Mongol-Langam.json +++ b/data/Mongol-Langam.json @@ -2,32 +2,40 @@ "name": "Mongol-Langam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pondi", "iso_1_code": null, "iso_3_code": "lnm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5211", + "scripts": [], + "own_tokenizer": false }, { "name": "Mwakai", "iso_1_code": null, "iso_3_code": "mgt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5212", + "scripts": [], + "own_tokenizer": false }, { "name": "Ulwa", "iso_1_code": null, "iso_3_code": "yla", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5213", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5210", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mongolic.json b/data/Mongolic.json index 239392c6589e9d7a612684d43a85351e6f9a782a..cbef8a5036fbd9a6024f91104189a1ec439606d7 100644 --- a/data/Mongolic.json +++ b/data/Mongolic.json @@ -2,273 +2,398 @@ "name": "Mongolic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Dagur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Daur", "iso_1_code": null, "iso_3_code": "dta", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5217", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5216", + "scripts": [], + "own_tokenizer": false }, { "name": "Mongour", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kangjia", "iso_1_code": null, "iso_3_code": "kxs", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5219", + "scripts": [], + "own_tokenizer": false }, { "name": "Tu", "iso_1_code": null, "iso_3_code": "mjg", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5220", + "scripts": [], + "own_tokenizer": false }, { "name": "Bonan", "iso_1_code": null, "iso_3_code": "peh", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5221", + "scripts": [], + "own_tokenizer": false }, { "name": "Dongxiang", "iso_1_code": null, "iso_3_code": "sce", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5222", + "scripts": [], + "own_tokenizer": false }, { "name": "Yugur, East", "iso_1_code": null, "iso_3_code": "yuy", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5223", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5218", + "scripts": [], + "own_tokenizer": false }, { "name": "Oirat-Khalkha", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Khamnigan Mongol", "iso_1_code": null, "iso_3_code": "ykh", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5225", + "scripts": [], + "own_tokenizer": false }, { "name": "Khalkha-Buriat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Buriat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Buriat, Mongolia", "iso_1_code": null, "iso_3_code": "bxm", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5228", + "scripts": [], + "own_tokenizer": false }, { "name": "Buriat, Russia", "iso_1_code": null, "iso_3_code": "bxr", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "5229", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Buriat, China", "iso_1_code": null, "iso_3_code": "bxu", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5230", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5227", + "scripts": [], + "own_tokenizer": false }, { "name": "Mongolian Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mongolian, Halh", "iso_1_code": "mn", "iso_3_code": "khk", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5232", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Mongolian, Peripheral", "iso_1_code": "mn", "iso_3_code": "mvf", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5233", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5231", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5226", + "scripts": [], + "own_tokenizer": false }, { "name": "Oirat-Kalmyk-Darkhat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kalmyk-Oirat", "iso_1_code": null, "iso_3_code": "xal", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5235", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5234", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5224", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5215", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mogholi", "iso_1_code": null, "iso_3_code": "mhj", - "tokenizer": { - "name": "russia_buriat", - "tokenizer": "StanzaTokenizer(\"bxr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5237", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5236", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5214", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Mosetenan.json b/data/Mosetenan.json index b983301c076e15eca672fdb1f648461f403483b9..9d72f10dcf46137aad28abc6caca3665b8b36f2f 100644 --- a/data/Mosetenan.json +++ b/data/Mosetenan.json @@ -2,16 +2,22 @@ "name": "Mosetenan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tsiman\u00e9", "iso_1_code": null, "iso_3_code": "cas", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5239", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5238", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Muran.json b/data/Muran.json index e9cdcbcbfef7eff26d00849a4a554220f0ea2d3a..c710770e68c724418969fa2b8149379a2afa6030 100644 --- a/data/Muran.json +++ b/data/Muran.json @@ -2,16 +2,20 @@ "name": "Muran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pirah\u00e3", "iso_1_code": null, "iso_3_code": "myp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5241", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5240", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Muskogean.json b/data/Muskogean.json index de77d0f6bfc504292aad6325abe523631394e4de..6e205c6d9c84a575b4052f936ae54d5025b08f04 100644 --- a/data/Muskogean.json +++ b/data/Muskogean.json @@ -2,127 +2,161 @@ "name": "Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apalachee-Alabama-Koasati", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apalachee", "iso_1_code": null, "iso_3_code": "xap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5246", + "scripts": [], + "own_tokenizer": false }, { "name": "Alabama-Koasati", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alabama", "iso_1_code": null, "iso_3_code": "akz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5248", + "scripts": [], + "own_tokenizer": false }, { "name": "Koasati", "iso_1_code": null, "iso_3_code": "cku", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5249", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5247", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5245", + "scripts": [], + "own_tokenizer": false }, { "name": "Hitchiti-Mikasuki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mikasuki", "iso_1_code": null, "iso_3_code": "mik", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5251", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5250", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5244", + "scripts": [], + "own_tokenizer": false }, { "name": "Creek-Seminole", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Muskogee", "iso_1_code": null, "iso_3_code": "mus", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5253", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5252", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5243", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Choctaw", "iso_1_code": null, "iso_3_code": "cho", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5255", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chickasaw", "iso_1_code": null, "iso_3_code": "cic", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5256", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5254", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5242", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Nakh-Daghestanian.json b/data/Nakh-Daghestanian.json index 294909568272be560ff3fa0a1ac36726c6baa3ca..52ef3e9521627ea3cba326bce3658062e1580aa9 100644 --- a/data/Nakh-Daghestanian.json +++ b/data/Nakh-Daghestanian.json @@ -2,427 +2,549 @@ "name": "Nakh-Daghestanian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Avar-Andic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Akhvakh", "iso_1_code": null, "iso_3_code": "akv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5260", + "scripts": [], + "own_tokenizer": false }, { "name": "Andi", "iso_1_code": null, "iso_3_code": "ani", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5261", + "scripts": [], + "own_tokenizer": false }, { "name": "Botlikh", "iso_1_code": null, "iso_3_code": "bph", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5262", + "scripts": [], + "own_tokenizer": false }, { "name": "Chamalal", "iso_1_code": null, "iso_3_code": "cji", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5263", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghodoberi", "iso_1_code": null, "iso_3_code": "gdo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5264", + "scripts": [], + "own_tokenizer": false }, { "name": "Karata", "iso_1_code": null, "iso_3_code": "kpt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5265", + "scripts": [], + "own_tokenizer": false }, { "name": "Bagvalal", "iso_1_code": null, "iso_3_code": "kva", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5266", + "scripts": [], + "own_tokenizer": false }, { "name": "Tindi", "iso_1_code": null, "iso_3_code": "tin", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5267", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5259", + "scripts": [], + "own_tokenizer": false }, { "name": "Avar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Avar", "iso_1_code": "av", "iso_3_code": "ava", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5269", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5268", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5258", + "scripts": [], + "own_tokenizer": false }, { "name": "Dargi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dargwa", "iso_1_code": null, "iso_3_code": "dar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5271", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Kubachi", "iso_1_code": null, "iso_3_code": "ugh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5272", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaitag", "iso_1_code": null, "iso_3_code": "xdq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5273", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5270", + "scripts": [], + "own_tokenizer": false }, { "name": "Khinalugh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Khinalugh", "iso_1_code": null, "iso_3_code": "kjj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5275", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5274", + "scripts": [], + "own_tokenizer": false }, { "name": "Lak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lak", "iso_1_code": null, "iso_3_code": "lbe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5277", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5276", + "scripts": [], + "own_tokenizer": false }, { "name": "Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Archi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Archi", "iso_1_code": null, "iso_3_code": "aqc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5280", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5279", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "East Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aghul", "iso_1_code": null, "iso_3_code": "agx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5283", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Lezgi", "iso_1_code": null, "iso_3_code": "lez", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5284", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Tabasaran", "iso_1_code": null, "iso_3_code": "tab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5285", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5282", + "scripts": [], + "own_tokenizer": false }, { "name": "South Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Budukh", "iso_1_code": null, "iso_3_code": "bdk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5287", + "scripts": [], + "own_tokenizer": false }, { "name": "Kryts", "iso_1_code": null, "iso_3_code": "kry", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5288", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5286", + "scripts": [], + "own_tokenizer": false }, { "name": "West Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Rutul", "iso_1_code": null, "iso_3_code": "rut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5290", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsakhur", "iso_1_code": null, "iso_3_code": "tkr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5291", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5289", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5281", + "scripts": [], + "own_tokenizer": false }, { "name": "Udi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Udi", "iso_1_code": null, "iso_3_code": "udi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5293", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5292", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5278", + "scripts": [], + "own_tokenizer": false }, { "name": "Nakh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Batsi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bats", "iso_1_code": null, "iso_3_code": "bbl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5296", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5295", + "scripts": [], + "own_tokenizer": false }, { "name": "Chechen-Ingush", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chechen", "iso_1_code": "ce", "iso_3_code": "che", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5298", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Ingush", "iso_1_code": null, "iso_3_code": "inh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5299", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5297", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5294", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsezic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "East Tsezic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hunzib", "iso_1_code": null, "iso_3_code": "huz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5302", + "scripts": [], + "own_tokenizer": false }, { "name": "Bezhta", "iso_1_code": null, "iso_3_code": "kap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5303", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5301", + "scripts": [], + "own_tokenizer": false }, { "name": "West Tsezic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dido", "iso_1_code": null, "iso_3_code": "ddo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5305", + "scripts": [], + "own_tokenizer": false }, { "name": "Hinukh", "iso_1_code": null, "iso_3_code": "gin", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5306", + "scripts": [], + "own_tokenizer": false }, { "name": "Khvarshi", "iso_1_code": null, "iso_3_code": "khv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5307", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5304", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5300", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5257", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Nambikwara.json b/data/Nambikwara.json index 0708b088a246aeb71515c97d9a652320db589918..8090852b5e04ee5aa5babdc09dab5c1ecbc4d0f6 100644 --- a/data/Nambikwara.json +++ b/data/Nambikwara.json @@ -2,99 +2,125 @@ "name": "Nambikwara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Saban\u00ea", "iso_1_code": null, "iso_3_code": "sae", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5309", + "scripts": [], + "own_tokenizer": false }, { "name": "Nambikwara Complex", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nambiku\u00e1ra, Southern", "iso_1_code": null, "iso_3_code": "nab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5311", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alapmunte", "iso_1_code": null, "iso_3_code": "apv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5313", + "scripts": [], + "own_tokenizer": false }, { "name": "Mamaind\u00ea", "iso_1_code": null, "iso_3_code": "wmd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5314", + "scripts": [], + "own_tokenizer": false }, { "name": "Yalakalore", "iso_1_code": null, "iso_3_code": "xyl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5315", + "scripts": [], + "own_tokenizer": false }, { "name": "Roosevelt Cluster", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lakond\u00ea", "iso_1_code": null, "iso_3_code": "lkd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5317", + "scripts": [], + "own_tokenizer": false }, { "name": "Latund\u00ea", "iso_1_code": null, "iso_3_code": "ltn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5318", + "scripts": [], + "own_tokenizer": false }, { "name": "Tawand\u00ea", "iso_1_code": null, "iso_3_code": "xtw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5319", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5316", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5312", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5310", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5308", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Niger-Congo.json b/data/Niger-Congo.json index 417f6a0842667d752b8482e08fd9ad1ab1b73e58..83e004d2ead51b16b625ccfdd5d63bb59f2fce11 100644 --- a/data/Niger-Congo.json +++ b/data/Niger-Congo.json @@ -2,28788 +2,41958 @@ "name": "Niger-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Atlantic-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Atlantic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Bijago", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bijag\u00f3", "iso_1_code": null, "iso_3_code": "bjg", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5324", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5323", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Bak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Balant-Ganja", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Balanta-Ganja", "iso_1_code": null, "iso_3_code": "bjt", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5328", + "scripts": [], + "own_tokenizer": false }, { "name": "Balanta", "iso_1_code": null, "iso_3_code": "ble", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5329", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5327", + "scripts": [], + "own_tokenizer": false }, { "name": "Jola", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bayot", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bayot", "iso_1_code": null, "iso_3_code": "bda", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5332", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5331", + "scripts": [], + "own_tokenizer": false }, { "name": "Jola Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jola Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gusilay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bandial", "iso_1_code": null, "iso_3_code": "bqj", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "5336", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gusilay", "iso_1_code": null, "iso_3_code": "gsl", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5337", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5335", + "scripts": [], + "own_tokenizer": false }, { "name": "Her-Ejamat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jola-Felupe", "iso_1_code": null, "iso_3_code": "eja", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5339", + "scripts": [], + "own_tokenizer": false }, { "name": "Kerak", "iso_1_code": null, "iso_3_code": "hhr", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5340", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5338", + "scripts": [], + "own_tokenizer": false }, { "name": "Jola-Fonyi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jola-Fonyi", "iso_1_code": null, "iso_3_code": "dyo", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "5342", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5341", + "scripts": [], + "own_tokenizer": false }, { "name": "Jola-Kasa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jola-Kasa", "iso_1_code": null, "iso_3_code": "csk", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "5344", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5343", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5334", + "scripts": [], + "own_tokenizer": false }, { "name": "Karon-Mlomp", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Karon", "iso_1_code": null, "iso_3_code": "krx", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5346", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mlomp", "iso_1_code": null, "iso_3_code": "mlo", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5347", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5345", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwatay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kuwaataay", "iso_1_code": null, "iso_3_code": "cwt", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5349", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5348", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5333", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5330", + "scripts": [], + "own_tokenizer": false }, { "name": "Manjaku-Papel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mankanya", "iso_1_code": null, "iso_3_code": "knf", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5351", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mandjak", "iso_1_code": null, "iso_3_code": "mfv", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5352", + "scripts": [], + "own_tokenizer": false }, { "name": "Papel", "iso_1_code": null, "iso_3_code": "pbo", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5353", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5350", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5326", + "scripts": [], + "own_tokenizer": false }, { "name": "Cangin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Laalaa", "iso_1_code": null, "iso_3_code": "cae", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5355", + "scripts": [], + "own_tokenizer": false }, { "name": "Paloor", "iso_1_code": null, "iso_3_code": "fap", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5356", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndut", "iso_1_code": null, "iso_3_code": "ndv", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5357", + "scripts": [], + "own_tokenizer": false }, { "name": "Saafi-Saafi", "iso_1_code": null, "iso_3_code": "sav", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5358", + "scripts": [], + "own_tokenizer": false }, { "name": "Noon", "iso_1_code": null, "iso_3_code": "snf", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5359", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5354", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Senegal-Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banyun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bainouk-Gunyu\u00f1o", "iso_1_code": null, "iso_3_code": "bab", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5362", + "scripts": [], + "own_tokenizer": false }, { "name": "Bainouk-Samik", "iso_1_code": null, "iso_3_code": "bcb", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5363", + "scripts": [], + "own_tokenizer": false }, { "name": "Bainouk-Gunyaamolo", "iso_1_code": null, "iso_3_code": "bcz", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5364", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5361", + "scripts": [], + "own_tokenizer": false }, { "name": "Nun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kasanga", "iso_1_code": null, "iso_3_code": "ccj", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5366", + "scripts": [], + "own_tokenizer": false }, { "name": "Kobiana", "iso_1_code": null, "iso_3_code": "kcj", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5367", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5365", + "scripts": [], + "own_tokenizer": false }, { "name": "Tenda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Biafada", "iso_1_code": null, "iso_3_code": "bif", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5369", + "scripts": [], + "own_tokenizer": false }, { "name": "Oniyan", "iso_1_code": null, "iso_3_code": "bsc", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5370", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wamey", "iso_1_code": null, "iso_3_code": "cou", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5371", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Badyara", "iso_1_code": null, "iso_3_code": "pbp", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5372", + "scripts": [], + "own_tokenizer": false }, { "name": "M\u00e9nik", "iso_1_code": null, "iso_3_code": "tnr", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5373", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5368", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5360", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbulungish-Nalu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbulungish", "iso_1_code": null, "iso_3_code": "mbv", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5375", + "scripts": [], + "own_tokenizer": false }, { "name": "Nalu", "iso_1_code": null, "iso_3_code": "naj", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5376", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5374", + "scripts": [], + "own_tokenizer": false }, { "name": "Senegambian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Fula-Wolof", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Fula", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "East Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fulfulde, Western Niger", "iso_1_code": "ff", "iso_3_code": "fuh", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5381", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fulfulde, Central-Eastern Niger", "iso_1_code": "ff", "iso_3_code": "fuq", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5382", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fulfulde, Nigerian", "iso_1_code": "ff", "iso_3_code": "fuv", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5383", + "scripts": [ + "Latn", + "Arab" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5380", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fulfulde, Adamawa", "iso_1_code": "ff", "iso_3_code": "fub", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5385", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fulfulde, Bagirmi", "iso_1_code": "ff", "iso_3_code": "fui", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5386", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5384", + "scripts": [], + "own_tokenizer": false }, { "name": "West Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fulfulde, Maasina", "iso_1_code": "ff", "iso_3_code": "ffm", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5388", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fulfulde, Borgu", "iso_1_code": "ff", "iso_3_code": "fue", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5389", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pular", "iso_1_code": "ff", "iso_3_code": "fuf", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5390", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5387", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pulaar", "iso_1_code": "ff", "iso_3_code": "fuc", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5392", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5391", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5379", + "scripts": [], + "own_tokenizer": false }, { "name": "Wolof", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Wolof, Gambian", "iso_1_code": null, "iso_3_code": "wof", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5394", + "scripts": [], + "own_tokenizer": false }, { "name": "Wolof", "iso_1_code": "wo", "iso_3_code": "wol", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "5395", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "5393", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5378", + "scripts": [], + "own_tokenizer": false }, { "name": "Serer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Serer-Sine", "iso_1_code": null, "iso_3_code": "srr", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5397", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5396", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5377", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5325", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Limba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Limba, West-Central", "iso_1_code": null, "iso_3_code": "lia", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5400", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Limba, East", "iso_1_code": null, "iso_3_code": "lma", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5401", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5399", + "scripts": [], + "own_tokenizer": false }, { "name": "Mel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bullom-Kissi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bullom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bom-Kim", "iso_1_code": null, "iso_3_code": "bmf", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5406", + "scripts": [], + "own_tokenizer": false }, { "name": "Bullom So", "iso_1_code": null, "iso_3_code": "buy", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5407", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5405", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sherbro", "iso_1_code": null, "iso_3_code": "bun", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5409", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5408", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5404", + "scripts": [], + "own_tokenizer": false }, { "name": "Kissi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kissi, Northern", "iso_1_code": null, "iso_3_code": "kqs", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5411", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kisi, Southern", "iso_1_code": null, "iso_3_code": "kss", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5412", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5410", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5403", + "scripts": [], + "own_tokenizer": false }, { "name": "Gola", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gola", "iso_1_code": null, "iso_3_code": "gol", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5414", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5413", + "scripts": [], + "own_tokenizer": false }, { "name": "Temne", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baga Pokur", "iso_1_code": null, "iso_3_code": "bcg", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5417", + "scripts": [], + "own_tokenizer": false }, { "name": "Baga Koga", "iso_1_code": null, "iso_3_code": "bgo", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5418", + "scripts": [], + "own_tokenizer": false }, { "name": "Baga Manduri", "iso_1_code": null, "iso_3_code": "bmd", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5419", + "scripts": [], + "own_tokenizer": false }, { "name": "Baga Kaloum", "iso_1_code": null, "iso_3_code": "bqf", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5420", + "scripts": [], + "own_tokenizer": false }, { "name": "Baga Sitemu", "iso_1_code": null, "iso_3_code": "bsp", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5421", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Baga Soban\u00e9", "iso_1_code": null, "iso_3_code": "bsv", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5422", + "scripts": [], + "own_tokenizer": false }, { "name": "Landoma", "iso_1_code": null, "iso_3_code": "ldm", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5423", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5416", + "scripts": [], + "own_tokenizer": false }, { "name": "Temne-Banta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Themne", "iso_1_code": null, "iso_3_code": "tem", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5425", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5424", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5415", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5402", + "scripts": [], + "own_tokenizer": false }, { "name": "Sua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mansoanka", "iso_1_code": null, "iso_3_code": "msw", - "tokenizer": { - "name": "wolof", - "tokenizer": "StanzaTokenizer(\"wo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5427", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5426", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5322", + "scripts": [], + "own_tokenizer": false }, { "name": "Ijoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Defaka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Defaka", "iso_1_code": null, "iso_3_code": "afn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5430", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5429", + "scripts": [], + "own_tokenizer": false }, { "name": "Ijo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ijo, Southeast", "iso_1_code": null, "iso_3_code": "ijs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5433", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5432", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nkoroo", "iso_1_code": null, "iso_3_code": "nkx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5436", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ibani", "iso_1_code": null, "iso_3_code": "iby", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5438", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalabari", "iso_1_code": null, "iso_3_code": "ijn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5439", + "scripts": [], + "own_tokenizer": false }, { "name": "Kirike", "iso_1_code": null, "iso_3_code": "okr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5437", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5435", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5434", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Inland Ijo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Biseni", "iso_1_code": null, "iso_3_code": "ije", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5443", + "scripts": [], + "own_tokenizer": false }, { "name": "Okodia", "iso_1_code": null, "iso_3_code": "okd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5444", + "scripts": [], + "own_tokenizer": false }, { "name": "Oruma", "iso_1_code": null, "iso_3_code": "orr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5445", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5442", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5441", + "scripts": [], + "own_tokenizer": false }, { "name": "West Ijo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Izon", "iso_1_code": null, "iso_3_code": "ijc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5447", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5446", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5431", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5428", + "scripts": [], + "own_tokenizer": false }, { "name": "Volta-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Benue-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Akpes", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akpes", "iso_1_code": null, "iso_3_code": "ibe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5451", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5450", + "scripts": [], + "own_tokenizer": false }, { "name": "Bantoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dakoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Samba Daka", "iso_1_code": null, "iso_3_code": "ccg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5455", + "scripts": [], + "own_tokenizer": false }, { "name": "Dirim", "iso_1_code": null, "iso_3_code": "dir", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5456", + "scripts": [], + "own_tokenizer": false }, { "name": "Dong", "iso_1_code": null, "iso_3_code": "doh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5457", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamja-Dengsa-Tola", "iso_1_code": null, "iso_3_code": "ldh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5458", + "scripts": [], + "own_tokenizer": false }, { "name": "Gaa", "iso_1_code": null, "iso_3_code": "ttb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5459", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5454", + "scripts": [], + "own_tokenizer": false }, { "name": "Fam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fam", "iso_1_code": null, "iso_3_code": "fam", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5461", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5460", + "scripts": [], + "own_tokenizer": false }, { "name": "Mambiloid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mambila-Konja", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Konja", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwanja", "iso_1_code": null, "iso_3_code": "knp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5465", + "scripts": [], + "own_tokenizer": false }, { "name": "Twendi", "iso_1_code": null, "iso_3_code": "twn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5466", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5464", + "scripts": [], + "own_tokenizer": false }, { "name": "Magu-Kamkam-Kila", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbongno", "iso_1_code": null, "iso_3_code": "bgu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5468", + "scripts": [], + "own_tokenizer": false }, { "name": "Somyev", "iso_1_code": null, "iso_3_code": "kgt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5469", + "scripts": [], + "own_tokenizer": false }, { "name": "Mvanip", "iso_1_code": null, "iso_3_code": "mcj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5470", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndunda", "iso_1_code": null, "iso_3_code": "nuh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5471", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5467", + "scripts": [], + "own_tokenizer": false }, { "name": "Mambila", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mambila, Cameroon", "iso_1_code": null, "iso_3_code": "mcu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "5473", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mambila, Nigeria", "iso_1_code": null, "iso_3_code": "mzk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "5474", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5472", + "scripts": [], + "own_tokenizer": false }, { "name": "Njerup", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Njerep", "iso_1_code": null, "iso_3_code": "njr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5476", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5475", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5463", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndoro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndoola", "iso_1_code": null, "iso_3_code": "ndr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5478", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5477", + "scripts": [], + "own_tokenizer": false }, { "name": "Suga-Vute", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Suga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nizaa", "iso_1_code": null, "iso_3_code": "sgi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5481", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5480", + "scripts": [], + "own_tokenizer": false }, { "name": "Vute", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Vute", "iso_1_code": null, "iso_3_code": "vut", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "5483", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wawa", "iso_1_code": null, "iso_3_code": "www", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5484", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5482", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5479", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5462", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5453", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Beboid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sari", "iso_1_code": null, "iso_3_code": "asj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5487", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbuk", "iso_1_code": null, "iso_3_code": "bpc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5488", + "scripts": [], + "own_tokenizer": false }, { "name": "Bukwen", "iso_1_code": null, "iso_3_code": "buz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5489", + "scripts": [], + "own_tokenizer": false }, { "name": "Naami", "iso_1_code": null, "iso_3_code": "bzv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5490", + "scripts": [], + "own_tokenizer": false }, { "name": "Chung", "iso_1_code": null, "iso_3_code": "cnq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5491", + "scripts": [], + "own_tokenizer": false }, { "name": "Kemedzung", "iso_1_code": null, "iso_3_code": "dmo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5492", + "scripts": [], + "own_tokenizer": false }, { "name": "Mashi", "iso_1_code": null, "iso_3_code": "jms", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5493", + "scripts": [], + "own_tokenizer": false }, { "name": "Naki", "iso_1_code": null, "iso_3_code": "mff", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5494", + "scripts": [], + "own_tokenizer": false }, { "name": "Nchane", "iso_1_code": null, "iso_3_code": "ncr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5495", + "scripts": [], + "own_tokenizer": false }, { "name": "Noone", "iso_1_code": null, "iso_3_code": "nhu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5496", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5486", + "scripts": [], + "own_tokenizer": false }, { "name": "Ekoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ejagham", "iso_1_code": null, "iso_3_code": "etu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5498", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ndoe", "iso_1_code": null, "iso_3_code": "nbb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5499", + "scripts": [], + "own_tokenizer": false }, { "name": "Bakor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Abanyom", "iso_1_code": null, "iso_3_code": "abm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5501", + "scripts": [], + "own_tokenizer": false }, { "name": "Ekajuk", "iso_1_code": null, "iso_3_code": "eka", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5502", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nkem-Nkum", "iso_1_code": null, "iso_3_code": "isi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5503", + "scripts": [], + "own_tokenizer": false }, { "name": "Nnam", "iso_1_code": null, "iso_3_code": "nbp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5504", + "scripts": [], + "own_tokenizer": false }, { "name": "Nde-Nsele-Nta", "iso_1_code": null, "iso_3_code": "ndd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5505", + "scripts": [], + "own_tokenizer": false }, { "name": "Efutop", "iso_1_code": null, "iso_3_code": "ofu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5506", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5500", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5497", + "scripts": [], + "own_tokenizer": false }, { "name": "Jarawan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cameroon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nagumi", "iso_1_code": null, "iso_3_code": "ngv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5509", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbonga", "iso_1_code": null, "iso_3_code": "xmb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5510", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5508", + "scripts": [], + "own_tokenizer": false }, { "name": "Nigerian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbat", "iso_1_code": null, "iso_3_code": "bau", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5512", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulung", "iso_1_code": null, "iso_3_code": "bbu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5513", + "scripts": [], + "own_tokenizer": false }, { "name": "Bille", "iso_1_code": null, "iso_3_code": "bil", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5514", + "scripts": [], + "own_tokenizer": false }, { "name": "Lame", "iso_1_code": null, "iso_3_code": "bma", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5515", + "scripts": [], + "own_tokenizer": false }, { "name": "Duguri", "iso_1_code": null, "iso_3_code": "dbm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5516", + "scripts": [], + "own_tokenizer": false }, { "name": "Dulubu", "iso_1_code": null, "iso_3_code": "dbo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5517", + "scripts": [], + "own_tokenizer": false }, { "name": "Shiki", "iso_1_code": null, "iso_3_code": "gua", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5518", + "scripts": [], + "own_tokenizer": false }, { "name": "Gwa", "iso_1_code": null, "iso_3_code": "gwb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5519", + "scripts": [], + "own_tokenizer": false }, { "name": "Gwak", "iso_1_code": null, "iso_3_code": "jgk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5520", + "scripts": [], + "own_tokenizer": false }, { "name": "Bankal", "iso_1_code": null, "iso_3_code": "jjr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5521", + "scripts": [], + "own_tokenizer": false }, { "name": "Labir", "iso_1_code": null, "iso_3_code": "jku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5522", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbula-Bwazza", "iso_1_code": null, "iso_3_code": "mbu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5523", + "scripts": [], + "own_tokenizer": false }, { "name": "Mama", "iso_1_code": null, "iso_3_code": "mma", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5524", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5511", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5507", + "scripts": [], + "own_tokenizer": false }, { "name": "Mamfe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Denya", "iso_1_code": null, "iso_3_code": "anv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5526", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kenyang", "iso_1_code": null, "iso_3_code": "ken", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5527", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kendem", "iso_1_code": null, "iso_3_code": "kvm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5528", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5525", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbe", "iso_1_code": null, "iso_3_code": "mfo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5530", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5529", + "scripts": [], + "own_tokenizer": false }, { "name": "Narrow Bantu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "D", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bembe-Buyi (D.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bembe", "iso_1_code": null, "iso_3_code": "bmb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5535", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5534", + "scripts": [], + "own_tokenizer": false }, { "name": "Bembe-Buyi (D.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buyu", "iso_1_code": null, "iso_3_code": "byi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5537", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5536", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.301)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kari", "iso_1_code": null, "iso_3_code": "kbj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5539", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5538", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.302)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boguru", "iso_1_code": null, "iso_3_code": "bqu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5541", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5540", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.303)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngbinda", "iso_1_code": null, "iso_3_code": "nbd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5543", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5542", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.304)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Homa", "iso_1_code": null, "iso_3_code": "hom", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5545", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5544", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.305)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyanga-li", "iso_1_code": null, "iso_3_code": "nyc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5547", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5546", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.307)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mayeka", "iso_1_code": null, "iso_3_code": "myc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5549", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5548", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.308)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bodo", "iso_1_code": null, "iso_3_code": "boy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5551", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5550", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bhele", "iso_1_code": null, "iso_3_code": "bhy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5553", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5552", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bila", "iso_1_code": null, "iso_3_code": "bip", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5555", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5554", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.312)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kaiku", "iso_1_code": null, "iso_3_code": "kkq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5557", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5556", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bira", "iso_1_code": null, "iso_3_code": "brf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5559", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5558", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyali", "iso_1_code": null, "iso_3_code": "nlj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5561", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5560", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.331)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Vanuma", "iso_1_code": null, "iso_3_code": "vau", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5563", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5562", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.332)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Budu", "iso_1_code": null, "iso_3_code": "buu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5565", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5564", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.333)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndaka", "iso_1_code": null, "iso_3_code": "ndk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5567", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5566", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.334)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbo", "iso_1_code": null, "iso_3_code": "zmw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5569", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5568", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.335)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Beeke", "iso_1_code": null, "iso_3_code": "bkf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5571", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5570", + "scripts": [], + "own_tokenizer": false }, { "name": "Bira-Nyali (D.336)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngbee", "iso_1_code": null, "iso_3_code": "jgb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5573", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5572", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.201)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lika", "iso_1_code": null, "iso_3_code": "lik", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5575", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5574", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bali", "iso_1_code": null, "iso_3_code": "bcp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5577", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5576", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.211)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kango", "iso_1_code": null, "iso_3_code": "kzy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5579", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5578", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Amba", "iso_1_code": null, "iso_3_code": "rwm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5581", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5580", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Komo", "iso_1_code": null, "iso_3_code": "kmw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5583", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5582", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Songoora", "iso_1_code": null, "iso_3_code": "sod", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5585", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5584", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lega-Mwenga", "iso_1_code": null, "iso_3_code": "lgm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5587", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5586", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.251)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lega-Shabunda", "iso_1_code": null, "iso_3_code": "lea", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5589", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5588", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.251)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kanu", "iso_1_code": null, "iso_3_code": "khx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5591", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwami", "iso_1_code": null, "iso_3_code": "ktf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5592", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5590", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.26)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zimba", "iso_1_code": null, "iso_3_code": "zmb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5594", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5593", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.27)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bangubangu", "iso_1_code": null, "iso_3_code": "bnx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5596", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5595", + "scripts": [], + "own_tokenizer": false }, { "name": "Lega-Holoholo (D.28)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Holoholo", "iso_1_code": null, "iso_3_code": "hoo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5598", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5597", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbole-Enya (D.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbole", "iso_1_code": null, "iso_3_code": "mdq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5600", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5599", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbole-Enya (D.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lengola", "iso_1_code": null, "iso_3_code": "lej", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5602", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5601", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbole-Enya (D.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mituku", "iso_1_code": null, "iso_3_code": "zmq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5604", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5603", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbole-Enya (D.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Enya", "iso_1_code": null, "iso_3_code": "gey", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5606", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5605", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbolle-Enya (D.141)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zula", "iso_1_code": null, "iso_3_code": "zla", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5608", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5607", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyanga (D.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyanga", "iso_1_code": null, "iso_3_code": "nyj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "5610", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "5609", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5533", + "scripts": [], + "own_tokenizer": false }, { "name": "E", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chaga (E.621)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Machame", "iso_1_code": null, "iso_3_code": "jmc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5613", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Rwa", "iso_1_code": null, "iso_3_code": "rwk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5614", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5612", + "scripts": [], + "own_tokenizer": false }, { "name": "Chaga (E.622)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mochi", "iso_1_code": null, "iso_3_code": "old", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5616", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vunjo", "iso_1_code": null, "iso_3_code": "vun", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5617", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5615", + "scripts": [], + "own_tokenizer": false }, { "name": "Chaga (E.623)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Rombo", "iso_1_code": null, "iso_3_code": "rof", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5619", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5618", + "scripts": [], + "own_tokenizer": false }, { "name": "Chaga (E.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kahe", "iso_1_code": null, "iso_3_code": "hka", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5621", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5620", + "scripts": [], + "own_tokenizer": false }, { "name": "Chaga (E.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gweno", "iso_1_code": null, "iso_3_code": "gwe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5623", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5622", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gikuyu", "iso_1_code": "ki", "iso_3_code": "kik", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5625", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5624", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kiembu", "iso_1_code": null, "iso_3_code": "ebu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5627", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5626", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kim\u00ee\u00eeru", "iso_1_code": null, "iso_3_code": "mer", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5629", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5628", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.531)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mwimbi-Muthambi", "iso_1_code": null, "iso_3_code": "mws", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5631", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5630", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kitharaka", "iso_1_code": null, "iso_3_code": "thk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5633", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5632", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.541)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gichuka", "iso_1_code": null, "iso_3_code": "cuh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5635", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5634", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kamba", "iso_1_code": null, "iso_3_code": "kam", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5637", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5636", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikuyu-Kamba (E.56)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dhaiso", "iso_1_code": null, "iso_3_code": "dhs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5639", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5638", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika-Taita (E.701)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kiwilwana", "iso_1_code": null, "iso_3_code": "mlk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5641", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5640", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika-Taita (E.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kipfokomu", "iso_1_code": null, "iso_3_code": "pkb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5643", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5642", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika-Taita (E.72)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chichonyi-Chidzihana-Chikauma", "iso_1_code": null, "iso_3_code": "coh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5645", + "scripts": [], + "own_tokenizer": false }, { "name": "Chiduruma", "iso_1_code": null, "iso_3_code": "dug", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5646", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kigiryama", "iso_1_code": null, "iso_3_code": "nyf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5647", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5644", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika-Taita (E.73)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chidigo", "iso_1_code": null, "iso_3_code": "dig", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5649", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5648", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika-Taita (E.731)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Segeju", "iso_1_code": null, "iso_3_code": "seg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5651", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5650", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika-Taita (E.74)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dawida", "iso_1_code": null, "iso_3_code": "dav", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5653", + "scripts": [], + "own_tokenizer": false }, { "name": "Taveta", "iso_1_code": null, "iso_3_code": "tvs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5654", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5652", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika-Taita (E.741)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sagalla", "iso_1_code": null, "iso_3_code": "tga", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5656", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5655", + "scripts": [], + "own_tokenizer": false }, { "name": "Temi (E.46)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Temi", "iso_1_code": null, "iso_3_code": "soz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "5658", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "5657", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5611", + "scripts": [], + "own_tokenizer": false }, { "name": "F", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ruwila", "iso_1_code": null, "iso_3_code": "rwl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5660", + "scripts": [], + "own_tokenizer": false }, { "name": "Nilamba-Rangi (F.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Isanzu", "iso_1_code": null, "iso_3_code": "isn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5662", + "scripts": [], + "own_tokenizer": false }, { "name": "Nilamba", "iso_1_code": null, "iso_3_code": "nim", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5663", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5661", + "scripts": [], + "own_tokenizer": false }, { "name": "Nilamba-Rangi (F.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyaturu", "iso_1_code": null, "iso_3_code": "rim", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5665", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5664", + "scripts": [], + "own_tokenizer": false }, { "name": "Nilamba-Rangi (F.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Rangi", "iso_1_code": null, "iso_3_code": "lag", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5667", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5666", + "scripts": [], + "own_tokenizer": false }, { "name": "Nilamba-Rangi (F.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbugwe", "iso_1_code": null, "iso_3_code": "mgz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5669", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5668", + "scripts": [], + "own_tokenizer": false }, { "name": "Sukuma-Nyamwezi (F.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sukuma", "iso_1_code": null, "iso_3_code": "suk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5671", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5670", + "scripts": [], + "own_tokenizer": false }, { "name": "Sukuma-Nyamwezi (F.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Konongo", "iso_1_code": null, "iso_3_code": "kcz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5673", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyamwezi", "iso_1_code": null, "iso_3_code": "nym", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5674", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5672", + "scripts": [], + "own_tokenizer": false }, { "name": "Sukuma-Nyamwezi (F.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sumbwa", "iso_1_code": null, "iso_3_code": "suw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5676", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5675", + "scripts": [], + "own_tokenizer": false }, { "name": "Sukuma-Nyamwezi (F.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kimbu", "iso_1_code": null, "iso_3_code": "kiv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5678", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5677", + "scripts": [], + "own_tokenizer": false }, { "name": "Sukuma-Nyamwezi (F.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bungu", "iso_1_code": null, "iso_3_code": "wun", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5680", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5679", + "scripts": [], + "own_tokenizer": false }, { "name": "Tongwe-Bende (F.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tongwe", "iso_1_code": null, "iso_3_code": "tny", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5682", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5681", + "scripts": [], + "own_tokenizer": false }, { "name": "Tongwe-Bende (F.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bende", "iso_1_code": null, "iso_3_code": "bdp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "5684", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "5683", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5659", + "scripts": [], + "own_tokenizer": false }, { "name": "G", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bena-Kinga (G.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sangu", "iso_1_code": null, "iso_3_code": "sbp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5687", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5686", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena-Kinga (G.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hehe", "iso_1_code": null, "iso_3_code": "heh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5689", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5688", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena-Kinga (G.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bena", "iso_1_code": null, "iso_3_code": "bez", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5691", + "scripts": [], + "own_tokenizer": false }, { "name": "Benamanga", "iso_1_code": null, "iso_3_code": "egm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5692", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5690", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena-Kinga (G.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pangwa", "iso_1_code": null, "iso_3_code": "pbr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5694", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5693", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena-Kinga (G.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kinga", "iso_1_code": null, "iso_3_code": "zga", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5696", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5695", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena-Kinga (G.651)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Magoma", "iso_1_code": null, "iso_3_code": "gmx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5698", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5697", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena-Kinga (G.66)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Vwanji", "iso_1_code": null, "iso_3_code": "wbi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5700", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5699", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena-Kinga (G.67)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kisi", "iso_1_code": null, "iso_3_code": "kiz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5702", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5701", + "scripts": [], + "own_tokenizer": false }, { "name": "Gogo-Kagulu (G.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gogo", "iso_1_code": null, "iso_3_code": "gog", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5704", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5703", + "scripts": [], + "own_tokenizer": false }, { "name": "Gogo-Kagulu (G.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kagulu", "iso_1_code": null, "iso_3_code": "kki", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5706", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5705", + "scripts": [], + "own_tokenizer": false }, { "name": "Pogolo-Ndamba (G.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pogolo", "iso_1_code": null, "iso_3_code": "poy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5708", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5707", + "scripts": [], + "own_tokenizer": false }, { "name": "Pogolo-Ndamba (G.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndamba", "iso_1_code": null, "iso_3_code": "ndj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5710", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5709", + "scripts": [], + "own_tokenizer": false }, { "name": "Shambala (G.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Asu", "iso_1_code": null, "iso_3_code": "asa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5712", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5711", + "scripts": [], + "own_tokenizer": false }, { "name": "Shambala (G.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shambala", "iso_1_code": null, "iso_3_code": "ksb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5714", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5713", + "scripts": [], + "own_tokenizer": false }, { "name": "Shambala (G.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bondei", "iso_1_code": null, "iso_3_code": "bou", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5716", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5715", + "scripts": [], + "own_tokenizer": false }, { "name": "Swahili (G.40)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Swahili, Congo", "iso_1_code": "sw", "iso_3_code": "swc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5718", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5717", + "scripts": [], + "own_tokenizer": false }, { "name": "Swahili (G.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Makwe", "iso_1_code": null, "iso_3_code": "ymk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5720", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5719", + "scripts": [], + "own_tokenizer": false }, { "name": "Swahili (G.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mwani", "iso_1_code": null, "iso_3_code": "wmw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5722", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5721", + "scripts": [], + "own_tokenizer": false }, { "name": "Swahili (G.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Swahili", "iso_1_code": "sw", "iso_3_code": "swh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5724", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5723", + "scripts": [], + "own_tokenizer": false }, { "name": "Swahili (G.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Comorian, Maore", "iso_1_code": null, "iso_3_code": "swb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5726", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Comorian, Mwali", "iso_1_code": null, "iso_3_code": "wlc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5727", + "scripts": [], + "own_tokenizer": false }, { "name": "Comorian, Ndzwani", "iso_1_code": null, "iso_3_code": "wni", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5728", + "scripts": [], + "own_tokenizer": false }, { "name": "Comorian, Ngazidja", "iso_1_code": null, "iso_3_code": "zdj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5729", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5725", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.301)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Doe", "iso_1_code": null, "iso_3_code": "doe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5731", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5730", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zigula", "iso_1_code": null, "iso_3_code": "ziw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5733", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5732", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mushungulu", "iso_1_code": null, "iso_3_code": "xma", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5735", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5734", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwere", "iso_1_code": null, "iso_3_code": "cwe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5737", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5736", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zaramo", "iso_1_code": null, "iso_3_code": "zaj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5739", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5738", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngulu", "iso_1_code": null, "iso_3_code": "ngp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5741", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5740", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luguru", "iso_1_code": null, "iso_3_code": "ruf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5743", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5742", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.36)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kami", "iso_1_code": null, "iso_3_code": "kcu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5745", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5744", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.37)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kutu", "iso_1_code": null, "iso_3_code": "kdc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5747", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5746", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.38)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Vidunda", "iso_1_code": null, "iso_3_code": "vid", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5749", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5748", + "scripts": [], + "own_tokenizer": false }, { "name": "Zigula-Zaramo (G.39)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sagala", "iso_1_code": null, "iso_3_code": "sbm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "5751", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "5750", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5685", + "scripts": [], + "own_tokenizer": false }, { "name": "H", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kikongo (H.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Beembe", "iso_1_code": null, "iso_3_code": "beq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5754", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5753", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikongo (H.112)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Doondo", "iso_1_code": null, "iso_3_code": "dde", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5756", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaamba", "iso_1_code": null, "iso_3_code": "xku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5757", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5755", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikongo (H.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Vili", "iso_1_code": null, "iso_3_code": "vif", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5759", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5758", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikongo (H.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kunyi", "iso_1_code": null, "iso_3_code": "njx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5761", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5760", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikongo (H.131)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Suundi", "iso_1_code": null, "iso_3_code": "sdj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5763", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5762", + "scripts": [], + "own_tokenizer": false }, { "name": "Kikongo (H.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koongo", "iso_1_code": "kg", "iso_3_code": "kng", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5765", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kikongo", "iso_1_code": "kg", "iso_3_code": "kwy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5766", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Laari", "iso_1_code": "kg", "iso_3_code": "ldi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5767", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kiyombe", "iso_1_code": null, "iso_3_code": "yom", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5768", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5764", + "scripts": [], + "own_tokenizer": false }, { "name": "Kimbundu (H.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kimbundu", "iso_1_code": null, "iso_3_code": "kmb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5770", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mpinda", "iso_1_code": null, "iso_3_code": "pnd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5771", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5769", + "scripts": [], + "own_tokenizer": false }, { "name": "Kimbundu (H.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kibala", "iso_1_code": null, "iso_3_code": "blv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5773", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5772", + "scripts": [], + "own_tokenizer": false }, { "name": "Kimbundu (H.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Songo", "iso_1_code": null, "iso_3_code": "nsx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5775", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5774", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbala-Hunganna (H.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbala", "iso_1_code": null, "iso_3_code": "mdp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5777", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5776", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbala-Hunganna (H.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hungana", "iso_1_code": null, "iso_3_code": "hum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5779", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5778", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaka (H.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lonzo", "iso_1_code": null, "iso_3_code": "lnz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5781", + "scripts": [], + "own_tokenizer": false }, { "name": "Pelende", "iso_1_code": null, "iso_3_code": "ppp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5782", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaka", "iso_1_code": null, "iso_3_code": "yaf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5783", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5780", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaka (H.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hungu", "iso_1_code": null, "iso_3_code": "hng", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5785", + "scripts": [], + "own_tokenizer": false }, { "name": "Suku", "iso_1_code": null, "iso_3_code": "sub", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5786", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5784", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaka (H.321)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sonde", "iso_1_code": null, "iso_3_code": "shc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5788", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5787", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaka (H.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbangala", "iso_1_code": null, "iso_3_code": "mxg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "5790", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "5789", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5752", + "scripts": [], + "own_tokenizer": false }, { "name": "J", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Haya-Jita (E.20)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Subi", "iso_1_code": null, "iso_3_code": "xsj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5793", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5792", + "scripts": [], + "own_tokenizer": false }, { "name": "Haya-Jita (E.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyambo", "iso_1_code": null, "iso_3_code": "now", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5795", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5794", + "scripts": [], + "own_tokenizer": false }, { "name": "Haya-Jita (E.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Haya", "iso_1_code": null, "iso_3_code": "hay", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5797", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5796", + "scripts": [], + "own_tokenizer": false }, { "name": "Haya-Jita (E.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zinza", "iso_1_code": null, "iso_3_code": "zin", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5799", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5798", + "scripts": [], + "own_tokenizer": false }, { "name": "Haya-Jita (E.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kerewe", "iso_1_code": null, "iso_3_code": "ked", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5801", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5800", + "scripts": [], + "own_tokenizer": false }, { "name": "Haya-Jita (E.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jita", "iso_1_code": null, "iso_3_code": "jit", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5803", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5802", + "scripts": [], + "own_tokenizer": false }, { "name": "Haya-Jita (E.251)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwaya", "iso_1_code": null, "iso_3_code": "kya", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5805", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5804", + "scripts": [], + "own_tokenizer": false }, { "name": "Haya-Jita (E.252)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kara", "iso_1_code": null, "iso_3_code": "reg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5807", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5806", + "scripts": [], + "own_tokenizer": false }, { "name": "Konzo-Ndandi (D.40)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kobo", "iso_1_code": null, "iso_3_code": "okc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5809", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5808", + "scripts": [], + "own_tokenizer": false }, { "name": "Konzo-Ndandi (D.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Konzo", "iso_1_code": null, "iso_3_code": "koo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5811", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5810", + "scripts": [], + "own_tokenizer": false }, { "name": "Konzo-Ndandi (D.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nande", "iso_1_code": null, "iso_3_code": "nnb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5813", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5812", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngoreme", "iso_1_code": null, "iso_3_code": "ngq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5815", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5814", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ikizu", "iso_1_code": null, "iso_3_code": "ikz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5817", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5816", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Suba", "iso_1_code": null, "iso_3_code": "sxb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5819", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5818", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.405)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kabwa", "iso_1_code": null, "iso_3_code": "cwa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5821", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5820", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.406)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Singa", "iso_1_code": null, "iso_3_code": "sgm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5823", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5822", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lulogooli", "iso_1_code": null, "iso_3_code": "rag", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5825", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5824", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.411)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luidakho-Luisukha-Lutirichi", "iso_1_code": null, "iso_3_code": "ida", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5827", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5826", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ekegusii", "iso_1_code": null, "iso_3_code": "guz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5829", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5828", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kuria", "iso_1_code": null, "iso_3_code": "kuj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5831", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5830", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.431)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Suba-Simbiti", "iso_1_code": null, "iso_3_code": "ssc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5833", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5832", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zanaki", "iso_1_code": null, "iso_3_code": "zak", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5835", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5834", + "scripts": [], + "own_tokenizer": false }, { "name": "Logooli-Kuria (E.45)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ikoma-Nata-Isenye", "iso_1_code": null, "iso_3_code": "ntk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5837", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5836", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaba-Luhya (E.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bukusu", "iso_1_code": null, "iso_3_code": "bxk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5839", + "scripts": [], + "own_tokenizer": false }, { "name": "Lutachoni", "iso_1_code": null, "iso_3_code": "lts", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5840", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaaba", "iso_1_code": null, "iso_3_code": "myx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5841", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5838", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaba-Luhya (E.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lukabaras", "iso_1_code": null, "iso_3_code": "lkb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5843", + "scripts": [], + "own_tokenizer": false }, { "name": "Olushisa", "iso_1_code": null, "iso_3_code": "lks", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5844", + "scripts": [], + "own_tokenizer": false }, { "name": "Olumarama", "iso_1_code": null, "iso_3_code": "lrm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5845", + "scripts": [], + "own_tokenizer": false }, { "name": "Olutsotso", "iso_1_code": null, "iso_3_code": "lto", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5846", + "scripts": [], + "own_tokenizer": false }, { "name": "Oluwanga", "iso_1_code": null, "iso_3_code": "lwg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5847", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nyala", "iso_1_code": null, "iso_3_code": "nle", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5848", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5842", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaba-Luhya (E.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Olunyole", "iso_1_code": null, "iso_3_code": "nyd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5850", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5849", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaba-Luhya (E.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Saamya-Gwe", "iso_1_code": null, "iso_3_code": "lsm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5852", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5851", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaba-Luhya (E.341)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Olukhayo", "iso_1_code": null, "iso_3_code": "lko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5854", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5853", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaba-Luhya (E.342)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Olumarachi", "iso_1_code": null, "iso_3_code": "lri", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5856", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5855", + "scripts": [], + "own_tokenizer": false }, { "name": "Masaba-Luhya (E.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyole", "iso_1_code": null, "iso_3_code": "nuj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5858", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5857", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.101)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gungu", "iso_1_code": null, "iso_3_code": "rub", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5860", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5859", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.102)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Talinga-Bwisi", "iso_1_code": null, "iso_3_code": "tlj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5862", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5861", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.103)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ruruuli-Runyala", "iso_1_code": null, "iso_3_code": "ruc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5864", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5863", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyoro", "iso_1_code": null, "iso_3_code": "nyo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5866", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5865", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tooro", "iso_1_code": null, "iso_3_code": "ttj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5868", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5867", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.121)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hema", "iso_1_code": null, "iso_3_code": "nix", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5870", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5869", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyankore", "iso_1_code": null, "iso_3_code": "nyn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5872", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5871", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chiga", "iso_1_code": null, "iso_3_code": "cgg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5874", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5873", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Ganda", "iso_1_code": "lg", "iso_3_code": "lug", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "5876", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "5875", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kenye", "iso_1_code": null, "iso_3_code": "lke", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5878", + "scripts": [], + "own_tokenizer": false }, { "name": "Soga", "iso_1_code": null, "iso_3_code": "xog", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5879", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5877", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyoro-Ganda (E.17)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gwere", "iso_1_code": null, "iso_3_code": "gwr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5881", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5880", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kinyarwanda", "iso_1_code": "rw", "iso_3_code": "kin", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5883", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5882", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Rundi", "iso_1_code": "rn", "iso_3_code": "run", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5885", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5884", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fuliiru", "iso_1_code": null, "iso_3_code": "flr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5887", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5886", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.631)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kiviila", "iso_1_code": null, "iso_3_code": "job", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5889", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5888", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shubi", "iso_1_code": null, "iso_3_code": "suj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5891", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5890", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hangaza", "iso_1_code": null, "iso_3_code": "han", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5893", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5892", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.66)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ha", "iso_1_code": null, "iso_3_code": "haq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5895", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5894", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruanda-Rundi (D.67)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5896", + "scripts": [], + "own_tokenizer": false }, { "name": "Shi-Hunde (D.501)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyindu", "iso_1_code": null, "iso_3_code": "nyg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5898", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5897", + "scripts": [], + "own_tokenizer": false }, { "name": "Shi-Hunde (D.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hunde", "iso_1_code": null, "iso_3_code": "hke", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5900", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5899", + "scripts": [], + "own_tokenizer": false }, { "name": "Shi-Hunde (D.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Havu", "iso_1_code": null, "iso_3_code": "hav", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5902", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5901", + "scripts": [], + "own_tokenizer": false }, { "name": "Shi-Hunde (D.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shi", "iso_1_code": null, "iso_3_code": "shr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5904", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5903", + "scripts": [], + "own_tokenizer": false }, { "name": "Shi-Hunde (D.531)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tembo", "iso_1_code": null, "iso_3_code": "tbt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5906", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5905", + "scripts": [], + "own_tokenizer": false }, { "name": "Shi-Hunde (D.56)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kabwari", "iso_1_code": null, "iso_3_code": "kcw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "5908", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "5907", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5791", + "scripts": [], + "own_tokenizer": false }, { "name": "K", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ciokwe-Luchazi (K.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chokwe", "iso_1_code": null, "iso_3_code": "cjk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5911", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5910", + "scripts": [], + "own_tokenizer": false }, { "name": "Ciokwe-Luchazi (K.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luimbi", "iso_1_code": null, "iso_3_code": "lum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5913", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyemba", "iso_1_code": null, "iso_3_code": "nba", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5914", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5912", + "scripts": [], + "own_tokenizer": false }, { "name": "Ciokwe-Luchazi (K.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luchazi", "iso_1_code": null, "iso_3_code": "lch", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5916", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5915", + "scripts": [], + "own_tokenizer": false }, { "name": "Ciokwe-Luchazi (K.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luvale", "iso_1_code": null, "iso_3_code": "lue", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5918", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5917", + "scripts": [], + "own_tokenizer": false }, { "name": "Ciokwe-Luchazi (K.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbunda", "iso_1_code": null, "iso_3_code": "mck", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5920", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5919", + "scripts": [], + "own_tokenizer": false }, { "name": "Ciokwe-Luchazi (K.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyengo", "iso_1_code": null, "iso_3_code": "nye", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5922", + "scripts": [], + "own_tokenizer": false }, { "name": "Yauma", "iso_1_code": null, "iso_3_code": "yax", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5923", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5921", + "scripts": [], + "own_tokenizer": false }, { "name": "Ciokwe-Luchazi (K.17)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbwela", "iso_1_code": null, "iso_3_code": "mfu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5925", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5924", + "scripts": [], + "own_tokenizer": false }, { "name": "Ciokwe-Luchazi (K.18)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nkangala", "iso_1_code": null, "iso_3_code": "nkn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5927", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5926", + "scripts": [], + "own_tokenizer": false }, { "name": "Lozi (K.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lozi", "iso_1_code": null, "iso_3_code": "loz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5929", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5928", + "scripts": [], + "own_tokenizer": false }, { "name": "Luyana (K.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luyana", "iso_1_code": null, "iso_3_code": "lyn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5931", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5930", + "scripts": [], + "own_tokenizer": false }, { "name": "Luyana (K.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbowe", "iso_1_code": null, "iso_3_code": "mxo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5933", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5932", + "scripts": [], + "own_tokenizer": false }, { "name": "Luyana (K.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwangali", "iso_1_code": null, "iso_3_code": "kwn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5935", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5934", + "scripts": [], + "own_tokenizer": false }, { "name": "Luyana (K.332)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gciriku", "iso_1_code": null, "iso_3_code": "diu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5937", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5936", + "scripts": [], + "own_tokenizer": false }, { "name": "Luyana (K.333)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbukushu", "iso_1_code": null, "iso_3_code": "mhw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5939", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5938", + "scripts": [], + "own_tokenizer": false }, { "name": "Luyana (K.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mashi", "iso_1_code": null, "iso_3_code": "mho", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5941", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5940", + "scripts": [], + "own_tokenizer": false }, { "name": "Luyana (K.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Simaa", "iso_1_code": null, "iso_3_code": "sie", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5943", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5942", + "scripts": [], + "own_tokenizer": false }, { "name": "Subiya-Totela (K.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fwe", "iso_1_code": null, "iso_3_code": "fwe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5945", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5944", + "scripts": [], + "own_tokenizer": false }, { "name": "Subiya-Totela (K.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Totela", "iso_1_code": null, "iso_3_code": "ttl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5947", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5946", + "scripts": [], + "own_tokenizer": false }, { "name": "Subiya-Totela (K.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kuhane", "iso_1_code": null, "iso_3_code": "sbs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] - } - ] + "children": [], + "node_i": "5949", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "5948", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5909", + "scripts": [], + "own_tokenizer": false }, { "name": "L", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kaonde (L.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kaonde", "iso_1_code": null, "iso_3_code": "kqn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5952", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5951", + "scripts": [], + "own_tokenizer": false }, { "name": "Luba (L.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luba-Kasai", "iso_1_code": null, "iso_3_code": "lua", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5954", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5953", + "scripts": [], + "own_tokenizer": false }, { "name": "Luba (L.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kanyok", "iso_1_code": null, "iso_3_code": "kny", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5956", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5955", + "scripts": [], + "own_tokenizer": false }, { "name": "Luba (L.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luba-Katanga", "iso_1_code": "lu", "iso_3_code": "lub", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5958", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5957", + "scripts": [], + "own_tokenizer": false }, { "name": "Luba (L.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hemba", "iso_1_code": null, "iso_3_code": "hem", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5960", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5959", + "scripts": [], + "own_tokenizer": false }, { "name": "Luba (L.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sanga", "iso_1_code": null, "iso_3_code": "sng", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5962", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5961", + "scripts": [], + "own_tokenizer": false }, { "name": "Lunda (L.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Salampasu", "iso_1_code": null, "iso_3_code": "slx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5964", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5963", + "scripts": [], + "own_tokenizer": false }, { "name": "Lunda (L.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lunda", "iso_1_code": null, "iso_3_code": "lun", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5966", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5965", + "scripts": [], + "own_tokenizer": false }, { "name": "Lunda (L.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ruund", "iso_1_code": null, "iso_3_code": "rnd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5968", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5967", + "scripts": [], + "own_tokenizer": false }, { "name": "Nkoya (L.60)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nkoya", "iso_1_code": null, "iso_3_code": "nka", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5970", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5969", + "scripts": [], + "own_tokenizer": false }, { "name": "Pende (L.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Phende", "iso_1_code": null, "iso_3_code": "pem", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5972", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5971", + "scripts": [], + "own_tokenizer": false }, { "name": "Pende (L.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Holu", "iso_1_code": null, "iso_3_code": "hol", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5974", + "scripts": [], + "own_tokenizer": false }, { "name": "Samba", "iso_1_code": null, "iso_3_code": "smx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5975", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5973", + "scripts": [], + "own_tokenizer": false }, { "name": "Pende (L.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwese", "iso_1_code": null, "iso_3_code": "kws", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5977", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5976", + "scripts": [], + "own_tokenizer": false }, { "name": "Songe (L.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kete", "iso_1_code": null, "iso_3_code": "kcv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5979", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5978", + "scripts": [], + "own_tokenizer": false }, { "name": "Songe (L.221)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lwalu", "iso_1_code": null, "iso_3_code": "lwa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5981", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5980", + "scripts": [], + "own_tokenizer": false }, { "name": "Songe (L.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Songe", "iso_1_code": null, "iso_3_code": "sop", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5983", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5982", + "scripts": [], + "own_tokenizer": false }, { "name": "Songe (L.231)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bindji", "iso_1_code": null, "iso_3_code": "bpj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5985", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5984", + "scripts": [], + "own_tokenizer": false }, { "name": "Songe (L.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Luna", "iso_1_code": null, "iso_3_code": "luj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "5987", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "5986", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5950", + "scripts": [], + "own_tokenizer": false }, { "name": "M", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bemba (M.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bwile", "iso_1_code": null, "iso_3_code": "bwc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5990", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5989", + "scripts": [], + "own_tokenizer": false }, { "name": "Bemba (M.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aushi", "iso_1_code": null, "iso_3_code": "auh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5992", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5991", + "scripts": [], + "own_tokenizer": false }, { "name": "Bemba (M.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Taabwa", "iso_1_code": null, "iso_3_code": "tap", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5994", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5993", + "scripts": [], + "own_tokenizer": false }, { "name": "Bemba (M.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bemba", "iso_1_code": null, "iso_3_code": "bem", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "5996", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "5995", + "scripts": [], + "own_tokenizer": false }, { "name": "Fipa-Mambwe (M.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pimbwe", "iso_1_code": null, "iso_3_code": "piw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "5998", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5997", + "scripts": [], + "own_tokenizer": false }, { "name": "Fipa-Mambwe (M.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Rungwa", "iso_1_code": null, "iso_3_code": "rnw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6000", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5999", + "scripts": [], + "own_tokenizer": false }, { "name": "Fipa-Mambwe (M.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fipa", "iso_1_code": null, "iso_3_code": "fip", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6002", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6001", + "scripts": [], + "own_tokenizer": false }, { "name": "Fipa-Mambwe (M.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mambwe-Lungu", "iso_1_code": null, "iso_3_code": "mgr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6004", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6003", + "scripts": [], + "own_tokenizer": false }, { "name": "Lala-Bisa-Lamba (M.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lala-Bisa", "iso_1_code": null, "iso_3_code": "leb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6006", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6005", + "scripts": [], + "own_tokenizer": false }, { "name": "Lala-Bisa-Lamba (M.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lamba", "iso_1_code": null, "iso_3_code": "lam", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6008", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6007", + "scripts": [], + "own_tokenizer": false }, { "name": "Lala-Bisa-Lamba (M.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Seba", "iso_1_code": null, "iso_3_code": "kdg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6010", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6009", + "scripts": [], + "own_tokenizer": false }, { "name": "Lenje-Tonga (M.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lenje", "iso_1_code": null, "iso_3_code": "leh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6012", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6011", + "scripts": [], + "own_tokenizer": false }, { "name": "Lenje-Tonga (M.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Soli", "iso_1_code": null, "iso_3_code": "sby", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6014", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6013", + "scripts": [], + "own_tokenizer": false }, { "name": "Lenje-Tonga (M.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ila", "iso_1_code": null, "iso_3_code": "ilb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6016", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6015", + "scripts": [], + "own_tokenizer": false }, { "name": "Lenje-Tonga (M.631)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sala", "iso_1_code": null, "iso_3_code": "shq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6018", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6017", + "scripts": [], + "own_tokenizer": false }, { "name": "Lenje-Tonga (M.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dombe", "iso_1_code": null, "iso_3_code": "dov", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6020", + "scripts": [], + "own_tokenizer": false }, { "name": "Tonga", "iso_1_code": null, "iso_3_code": "toi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6021", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6019", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyakyusa-Ngonde (M.301)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndali", "iso_1_code": null, "iso_3_code": "ndh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6023", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6022", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyakyusa-Ngonde (M.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyakyusa-Ngonde", "iso_1_code": null, "iso_3_code": "nyy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6025", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6024", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiha-Safwa (M.201)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lambya", "iso_1_code": null, "iso_3_code": "lai", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6027", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6026", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiha-Safwa (M.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wanda", "iso_1_code": null, "iso_3_code": "wbh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6029", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6028", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiha-Safwa (M.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyamwanga", "iso_1_code": null, "iso_3_code": "mwn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6031", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6030", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiha-Safwa (M.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyiha, Tanzania", "iso_1_code": null, "iso_3_code": "nih", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6033", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika, Tanzania", "iso_1_code": null, "iso_3_code": "nkt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6034", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyika", "iso_1_code": null, "iso_3_code": "nkv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6035", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiha, Malawi", "iso_1_code": null, "iso_3_code": "nyr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6036", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6032", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiha-Safwa (M.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Malila", "iso_1_code": null, "iso_3_code": "mgq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6038", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6037", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyiha-Safwa (M.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Safwa", "iso_1_code": null, "iso_3_code": "sbk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6040", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6039", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5988", + "scripts": [], + "own_tokenizer": false }, { "name": "N", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chewa-Nyanja (N.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chichewa", "iso_1_code": "ny", "iso_3_code": "nya", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6043", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6042", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda (N.101)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndendeule", "iso_1_code": null, "iso_3_code": "dne", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6045", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndwewe", "iso_1_code": null, "iso_3_code": "nww", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6046", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6044", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda (N.102)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nindi", "iso_1_code": null, "iso_3_code": "nxi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6048", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6047", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda (N.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Manda", "iso_1_code": null, "iso_3_code": "mgs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6050", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6049", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda (N.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chingoni", "iso_1_code": null, "iso_3_code": "xnj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6052", + "scripts": [], + "own_tokenizer": false }, { "name": "Xingoni", "iso_1_code": null, "iso_3_code": "xnq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6053", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6051", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda (N.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Matengo", "iso_1_code": null, "iso_3_code": "mgv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6055", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6054", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda (N.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mpoto", "iso_1_code": null, "iso_3_code": "mpa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6057", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6056", + "scripts": [], + "own_tokenizer": false }, { "name": "Manda (N.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tonga", "iso_1_code": null, "iso_3_code": "tog", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6059", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6058", + "scripts": [], + "own_tokenizer": false }, { "name": "Senga-Sena (N.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nsenga", "iso_1_code": null, "iso_3_code": "nse", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6061", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Phimbi", "iso_1_code": null, "iso_3_code": "phm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6062", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6060", + "scripts": [], + "own_tokenizer": false }, { "name": "Senga-Sena (N.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kunda", "iso_1_code": null, "iso_3_code": "kdn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6064", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6063", + "scripts": [], + "own_tokenizer": false }, { "name": "Senga-Sena (N.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyungwe", "iso_1_code": null, "iso_3_code": "nyu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6066", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6065", + "scripts": [], + "own_tokenizer": false }, { "name": "Senga-Sena (N.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Barwe", "iso_1_code": null, "iso_3_code": "bwg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6068", + "scripts": [], + "own_tokenizer": false }, { "name": "Sena", "iso_1_code": null, "iso_3_code": "seh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6069", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6067", + "scripts": [], + "own_tokenizer": false }, { "name": "Senga-Sena (N.441)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sena, Malawi", "iso_1_code": null, "iso_3_code": "swk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6071", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6070", + "scripts": [], + "own_tokenizer": false }, { "name": "Tumbuka (N.201)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mwera", "iso_1_code": null, "iso_3_code": "mjh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6073", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6072", + "scripts": [], + "own_tokenizer": false }, { "name": "Tumbuka (N.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tumbuka", "iso_1_code": null, "iso_3_code": "tum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] - } - ] + "children": [], + "node_i": "6075", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "6074", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6041", + "scripts": [], + "own_tokenizer": false }, { "name": "P", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Makhuwa (P.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kokola", "iso_1_code": null, "iso_3_code": "kzn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6078", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lolo", "iso_1_code": null, "iso_3_code": "llb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6079", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Makhuwa-Meetto", "iso_1_code": null, "iso_3_code": "mgh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6080", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manyawa", "iso_1_code": null, "iso_3_code": "mny", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6081", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Takwane", "iso_1_code": null, "iso_3_code": "tke", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6082", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Makhuwa-Shirima", "iso_1_code": null, "iso_3_code": "vmk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6083", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Marenje", "iso_1_code": null, "iso_3_code": "vmr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6084", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa", "iso_1_code": null, "iso_3_code": "vmw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6085", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Makhuwa-Marrevone", "iso_1_code": null, "iso_3_code": "xmc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6086", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa-Saka", "iso_1_code": null, "iso_3_code": "xsq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6087", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6077", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa (P.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koti", "iso_1_code": null, "iso_3_code": "eko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6089", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6088", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa (P.312)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nathembo", "iso_1_code": null, "iso_3_code": "nte", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6091", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6090", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa (P.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lomwe", "iso_1_code": null, "iso_3_code": "ngl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6093", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6092", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa (P.331)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lomwe, Malawi", "iso_1_code": null, "iso_3_code": "lon", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6095", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6094", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa (P.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chuwabu", "iso_1_code": null, "iso_3_code": "chw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6097", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maindo", "iso_1_code": null, "iso_3_code": "cwb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6098", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6096", + "scripts": [], + "own_tokenizer": false }, { "name": "Makhuwa (P.341)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Makhuwa-Moniga", "iso_1_code": null, "iso_3_code": "mhm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6100", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6099", + "scripts": [], + "own_tokenizer": false }, { "name": "Matuumbi (P.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndengereko", "iso_1_code": null, "iso_3_code": "ndg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6102", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6101", + "scripts": [], + "own_tokenizer": false }, { "name": "Matuumbi (P.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Rufiji", "iso_1_code": null, "iso_3_code": "rui", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6104", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6103", + "scripts": [], + "own_tokenizer": false }, { "name": "Matuumbi (P.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Matumbi", "iso_1_code": null, "iso_3_code": "mgw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6106", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6105", + "scripts": [], + "own_tokenizer": false }, { "name": "Matuumbi (P.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngindo", "iso_1_code": null, "iso_3_code": "nnq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6108", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6107", + "scripts": [], + "own_tokenizer": false }, { "name": "Matuumbi (P.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbunga", "iso_1_code": null, "iso_3_code": "mgy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6110", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6109", + "scripts": [], + "own_tokenizer": false }, { "name": "Yao (P.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yao", "iso_1_code": null, "iso_3_code": "yao", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6112", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6111", + "scripts": [], + "own_tokenizer": false }, { "name": "Yao (P.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mwera", "iso_1_code": null, "iso_3_code": "mwe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6114", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6113", + "scripts": [], + "own_tokenizer": false }, { "name": "Yao (P.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Makonde", "iso_1_code": null, "iso_3_code": "kde", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6116", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Machinga", "iso_1_code": null, "iso_3_code": "mvw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6117", + "scripts": [], + "own_tokenizer": false }, { "name": "Matambwe", "iso_1_code": null, "iso_3_code": "wtb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6118", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6115", + "scripts": [], + "own_tokenizer": false }, { "name": "Yao (P.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndonde Hamba", "iso_1_code": null, "iso_3_code": "njd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6120", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6119", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6076", + "scripts": [], + "own_tokenizer": false }, { "name": "R", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Herero (R.30)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Herero", "iso_1_code": "hz", "iso_3_code": "her", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6123", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6122", + "scripts": [], + "own_tokenizer": false }, { "name": "Herero (R.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dhimba", "iso_1_code": null, "iso_3_code": "dhm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6125", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6124", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbundu (R.101)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngendelengo", "iso_1_code": null, "iso_3_code": "nql", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6127", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuvale", "iso_1_code": null, "iso_3_code": "olu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6128", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6126", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbundu (R.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Umbundu", "iso_1_code": null, "iso_3_code": "umb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6130", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6129", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbundu (R.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndombe", "iso_1_code": null, "iso_3_code": "ndq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6132", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6131", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbundu (R.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyaneka", "iso_1_code": null, "iso_3_code": "nyk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6134", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kwandu", "iso_1_code": null, "iso_3_code": "xdo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6135", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6133", + "scripts": [], + "own_tokenizer": false }, { "name": "Umbundu (R.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nkumbi", "iso_1_code": null, "iso_3_code": "khu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6137", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6136", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambo (R.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oshiwambo", "iso_1_code": "kj", "iso_3_code": "kua", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6139", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6138", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambo (R.214)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbalanhu", "iso_1_code": null, "iso_3_code": "lnb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6141", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6140", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambo (R.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndonga", "iso_1_code": "ng", "iso_3_code": "ndo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6143", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6142", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambo (R.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwambi", "iso_1_code": null, "iso_3_code": "kwm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6145", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6144", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambo (R.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngandjera", "iso_1_code": null, "iso_3_code": "nne", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6147", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6146", + "scripts": [], + "own_tokenizer": false }, { "name": "Yeyi (R.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yeyi", "iso_1_code": null, "iso_3_code": "yey", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6149", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6148", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6121", + "scripts": [], + "own_tokenizer": false }, { "name": "S", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Copi (S.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chopi", "iso_1_code": null, "iso_3_code": "cce", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6152", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6151", + "scripts": [], + "own_tokenizer": false }, { "name": "Copi (S.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tonga", "iso_1_code": null, "iso_3_code": "toh", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6154", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6153", + "scripts": [], + "own_tokenizer": false }, { "name": "Nguni (S.407)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndebele", "iso_1_code": "nr", "iso_3_code": "nbl", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6156", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6155", + "scripts": [], + "own_tokenizer": false }, { "name": "Nguni (S.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Xhosa", "iso_1_code": "xh", "iso_3_code": "xho", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6158", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6157", + "scripts": [], + "own_tokenizer": false }, { "name": "Nguni (S.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zulu", "iso_1_code": "zu", "iso_3_code": "zul", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6160", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6159", + "scripts": [], + "own_tokenizer": false }, { "name": "Nguni (S.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Swati", "iso_1_code": "ss", "iso_3_code": "ssw", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6162", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6161", + "scripts": [], + "own_tokenizer": false }, { "name": "Nguni (S.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndebele", "iso_1_code": "nd", "iso_3_code": "nde", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6164", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6163", + "scripts": [], + "own_tokenizer": false }, { "name": "Shona (S.10)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dema", "iso_1_code": null, "iso_3_code": "dmx", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6166", + "scripts": [], + "own_tokenizer": false }, { "name": "Shona", "iso_1_code": "sn", "iso_3_code": "sna", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6167", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6165", + "scripts": [], + "own_tokenizer": false }, { "name": "Shona (S.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tawara", "iso_1_code": null, "iso_3_code": "twl", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6169", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6168", + "scripts": [], + "own_tokenizer": false }, { "name": "Shona (S.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Manyika", "iso_1_code": null, "iso_3_code": "mxc", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6171", + "scripts": [], + "own_tokenizer": false }, { "name": "Tewe", "iso_1_code": null, "iso_3_code": "twx", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6172", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6170", + "scripts": [], + "own_tokenizer": false }, { "name": "Shona (S.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndau", "iso_1_code": null, "iso_3_code": "ndc", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6174", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6173", + "scripts": [], + "own_tokenizer": false }, { "name": "Shona (S.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kalanga", "iso_1_code": null, "iso_3_code": "kck", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6176", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nambya", "iso_1_code": null, "iso_3_code": "nmq", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6177", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6175", + "scripts": [], + "own_tokenizer": false }, { "name": "Sotho-Tswana (S.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Setswana", "iso_1_code": "tn", "iso_3_code": "tsn", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "6179", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "6178", + "scripts": [], + "own_tokenizer": false }, { "name": "Sotho-Tswana (S.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kgalagadi", "iso_1_code": null, "iso_3_code": "xkv", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6181", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6180", + "scripts": [], + "own_tokenizer": false }, { "name": "Sotho-Tswana (S.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Birwa", "iso_1_code": null, "iso_3_code": "brl", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6183", + "scripts": [], + "own_tokenizer": false }, { "name": "Sotho, Northern", "iso_1_code": null, "iso_3_code": "nso", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6184", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tswapong", "iso_1_code": null, "iso_3_code": "two", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6185", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6182", + "scripts": [], + "own_tokenizer": false }, { "name": "Sotho-Tswana (S.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sotho, Southern", "iso_1_code": "st", "iso_3_code": "sot", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6187", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6186", + "scripts": [], + "own_tokenizer": false }, { "name": "Tswa-Rhonga (S.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tswa", "iso_1_code": null, "iso_3_code": "tsc", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6189", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6188", + "scripts": [], + "own_tokenizer": false }, { "name": "Tswa-Rhonga (S.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tsonga", "iso_1_code": "ts", "iso_3_code": "tso", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6191", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6190", + "scripts": [], + "own_tokenizer": false }, { "name": "Tswa-Rhonga (S.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ronga", "iso_1_code": null, "iso_3_code": "rng", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6193", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6192", + "scripts": [], + "own_tokenizer": false }, { "name": "Venda (S.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Venda", "iso_1_code": "ve", "iso_3_code": "ven", - "tokenizer": { - "name": "tswana", - "tokenizer": "SpaCyTokenizer(\"tn\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] - } - ] + "children": [], + "node_i": "6195", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "6194", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6150", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5532", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bafia (A.501)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hijuk", "iso_1_code": null, "iso_3_code": "hij", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6199", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6198", + "scripts": [], + "own_tokenizer": false }, { "name": "Bafia (A.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lefa", "iso_1_code": null, "iso_3_code": "lfa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6200", + "scripts": [], + "own_tokenizer": false }, { "name": "Bafia (A.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dimbong", "iso_1_code": null, "iso_3_code": "dii", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6203", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6202", + "scripts": [], + "own_tokenizer": false }, { "name": "Bafia (A.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bafia", "iso_1_code": null, "iso_3_code": "ksf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6205", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6204", + "scripts": [], + "own_tokenizer": false }, { "name": "Bafia (A.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tibea", "iso_1_code": null, "iso_3_code": "ngy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6207", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6206", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Barombi", "iso_1_code": null, "iso_3_code": "bbi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6209", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6208", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bankon", "iso_1_code": null, "iso_3_code": "abb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6211", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6210", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Basaa", "iso_1_code": null, "iso_3_code": "bas", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6213", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bakoko", "iso_1_code": null, "iso_3_code": "bkh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6214", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6212", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tunen", "iso_1_code": null, "iso_3_code": "tvu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6216", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6215", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.45)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyokon", "iso_1_code": null, "iso_3_code": "nvo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6218", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6217", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.46)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nomaande", "iso_1_code": null, "iso_3_code": "lem", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6220", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6219", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.461)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tuotomb", "iso_1_code": null, "iso_3_code": "ttf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6222", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6221", + "scripts": [], + "own_tokenizer": false }, { "name": "Basaa (A.462)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yambeta", "iso_1_code": null, "iso_3_code": "yat", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6224", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6223", + "scripts": [], + "own_tokenizer": false }, { "name": "Bubi-Benga (A.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bubia", "iso_1_code": null, "iso_3_code": "bbx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6226", + "scripts": [], + "own_tokenizer": false }, { "name": "Bube", "iso_1_code": null, "iso_3_code": "bvb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6227", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6225", + "scripts": [], + "own_tokenizer": false }, { "name": "Bubi-Benga (A.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Batanga", "iso_1_code": null, "iso_3_code": "bnm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6229", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6228", + "scripts": [], + "own_tokenizer": false }, { "name": "Bubi-Benga (A.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kombe", "iso_1_code": null, "iso_3_code": "nui", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6231", + "scripts": [], + "own_tokenizer": false }, { "name": "Iyasa", "iso_1_code": null, "iso_3_code": "yko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6232", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6230", + "scripts": [], + "own_tokenizer": false }, { "name": "Bubi-Benga (A.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Benga", "iso_1_code": null, "iso_3_code": "bng", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6234", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6233", + "scripts": [], + "own_tokenizer": false }, { "name": "Duala (A.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wumboko", "iso_1_code": null, "iso_3_code": "bqm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6236", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6235", + "scripts": [], + "own_tokenizer": false }, { "name": "Duala (A.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mokpwe", "iso_1_code": null, "iso_3_code": "bri", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6238", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6237", + "scripts": [], + "own_tokenizer": false }, { "name": "Duala (A.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Isubu", "iso_1_code": null, "iso_3_code": "szv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6240", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6239", + "scripts": [], + "own_tokenizer": false }, { "name": "Duala (A.231)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bakole", "iso_1_code": null, "iso_3_code": "kme", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6242", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6241", + "scripts": [], + "own_tokenizer": false }, { "name": "Duala (A.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Duala", "iso_1_code": null, "iso_3_code": "dua", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6244", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6243", + "scripts": [], + "own_tokenizer": false }, { "name": "Duala (A.27)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mulimba", "iso_1_code": null, "iso_3_code": "mzd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6246", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6245", + "scripts": [], + "own_tokenizer": false }, { "name": "Ewondo-Fang (A.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eton", "iso_1_code": null, "iso_3_code": "eto", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6248", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mengisa", "iso_1_code": null, "iso_3_code": "mct", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6249", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6247", + "scripts": [], + "own_tokenizer": false }, { "name": "Ewondo-Fang (A.72)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ewondo", "iso_1_code": null, "iso_3_code": "ewo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6251", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6250", + "scripts": [], + "own_tokenizer": false }, { "name": "Ewondo-Fang (A.73)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bebele", "iso_1_code": null, "iso_3_code": "beb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6253", + "scripts": [], + "own_tokenizer": false }, { "name": "Bebil", "iso_1_code": null, "iso_3_code": "bxp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6254", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6252", + "scripts": [], + "own_tokenizer": false }, { "name": "Ewondo-Fang (A.74)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bulu", "iso_1_code": null, "iso_3_code": "bum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6256", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6255", + "scripts": [], + "own_tokenizer": false }, { "name": "Ewondo-Fang (A.75)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fang", "iso_1_code": null, "iso_3_code": "fan", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6258", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6257", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaka (A.91)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwakum", "iso_1_code": null, "iso_3_code": "kwu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6260", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6259", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaka (A.92)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pol", "iso_1_code": null, "iso_3_code": "pmm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6262", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6261", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaka (A.93)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kako", "iso_1_code": null, "iso_3_code": "kkj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6264", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6263", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundu-Balong (A.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oroko", "iso_1_code": null, "iso_3_code": "bdu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6266", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6265", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundu-Balong (A.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bafaw-Balong", "iso_1_code": null, "iso_3_code": "bwt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6268", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6267", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundu-Balong (A.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bonkeng", "iso_1_code": null, "iso_3_code": "bvg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6270", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6269", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundu-Balong (A.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbo", "iso_1_code": null, "iso_3_code": "mbo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6272", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6271", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundu-Balong (A.151)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nkongho", "iso_1_code": null, "iso_3_code": "nkc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6274", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6273", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundu-Balong (A.15B)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bassossi", "iso_1_code": null, "iso_3_code": "bsi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6276", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6275", + "scripts": [], + "own_tokenizer": false }, { "name": "Lundu-Balong (A.15C)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bakaka", "iso_1_code": null, "iso_3_code": "bqz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6278", + "scripts": [], + "own_tokenizer": false }, { "name": "Akoose", "iso_1_code": null, "iso_3_code": "bss", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6279", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6277", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.801)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gyele", "iso_1_code": null, "iso_3_code": "gyi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6281", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6280", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.802)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ukhwejo", "iso_1_code": null, "iso_3_code": "ukh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6283", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6282", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.81)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwasio", "iso_1_code": null, "iso_3_code": "nmg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6285", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6284", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.82)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Swo", "iso_1_code": null, "iso_3_code": "sox", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6287", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6286", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.83)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Makaa", "iso_1_code": null, "iso_3_code": "mcp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6289", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6288", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.831)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Byep", "iso_1_code": null, "iso_3_code": "mkk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6291", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6290", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.832)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kol", "iso_1_code": null, "iso_3_code": "biw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6293", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6292", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.84)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Njyem", "iso_1_code": null, "iso_3_code": "njy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6294", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.842)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koonzime", "iso_1_code": null, "iso_3_code": "ozm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6297", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6296", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.85)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bekwel", "iso_1_code": null, "iso_3_code": "bkw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6299", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6298", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.86)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mpiemo", "iso_1_code": null, "iso_3_code": "mcx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6301", + "scripts": [], + "own_tokenizer": false }, { "name": "Mpumpong", "iso_1_code": null, "iso_3_code": "mgg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6302", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6300", + "scripts": [], + "own_tokenizer": false }, { "name": "Makaa-Njem (A.87)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bomwali", "iso_1_code": null, "iso_3_code": "bmw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6304", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6303", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanaga (A.601)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tuki", "iso_1_code": null, "iso_3_code": "bag", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6306", + "scripts": [], + "own_tokenizer": false }, { "name": "Leti", "iso_1_code": null, "iso_3_code": "leo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6307", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6305", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanaga (A.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nulibie", "iso_1_code": null, "iso_3_code": "ekm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6309", + "scripts": [], + "own_tokenizer": false }, { "name": "Numala", "iso_1_code": null, "iso_3_code": "mmu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6310", + "scripts": [], + "own_tokenizer": false }, { "name": "Yangben", "iso_1_code": null, "iso_3_code": "yav", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6311", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6308", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanaga (A.621)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nubaca", "iso_1_code": null, "iso_3_code": "baf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6313", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6312", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanaga (A.622)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nugunu", "iso_1_code": null, "iso_3_code": "yas", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6315", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6314", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanaga (A.623)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbule", "iso_1_code": null, "iso_3_code": "mlb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6317", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6316", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanaga (A.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bati", "iso_1_code": null, "iso_3_code": "btc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6319", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6318", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6197", + "scripts": [], + "own_tokenizer": false }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kele (B.201)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndasa", "iso_1_code": null, "iso_3_code": "nda", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6322", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6321", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.202)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sighu", "iso_1_code": null, "iso_3_code": "sxe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6324", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6323", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.203)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Samay", "iso_1_code": null, "iso_3_code": "syx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6326", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6325", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.204)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndambomo", "iso_1_code": null, "iso_3_code": "nxo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6328", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6327", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Seki", "iso_1_code": null, "iso_3_code": "syi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6330", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6329", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.211)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Molengue", "iso_1_code": null, "iso_3_code": "bxc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6332", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6331", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "K\u00e9l\u00e9", "iso_1_code": null, "iso_3_code": "keb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6334", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngom", "iso_1_code": null, "iso_3_code": "nra", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6335", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6333", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbangwe", "iso_1_code": null, "iso_3_code": "zmn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6337", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6336", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wumbvu", "iso_1_code": null, "iso_3_code": "wum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6339", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6338", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kota", "iso_1_code": null, "iso_3_code": "koq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6341", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6340", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.251)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sake", "iso_1_code": null, "iso_3_code": "sak", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6343", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6342", + "scripts": [], + "own_tokenizer": false }, { "name": "Kele (B.252)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mahongwe", "iso_1_code": null, "iso_3_code": "mhb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6345", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6344", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbete (B.602)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kaningi", "iso_1_code": null, "iso_3_code": "kzo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6347", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6346", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbete (B.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbere", "iso_1_code": null, "iso_3_code": "mdt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6349", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6348", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbete (B.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ombamba", "iso_1_code": null, "iso_3_code": "mbm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6351", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6350", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbete (B.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndumu", "iso_1_code": null, "iso_3_code": "nmd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6353", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6352", + "scripts": [], + "own_tokenizer": false }, { "name": "Myene (B.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Myene", "iso_1_code": null, "iso_3_code": "mye", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6355", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6354", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzebi (B.501)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wandji", "iso_1_code": null, "iso_3_code": "wdd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6357", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6356", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzebi (B.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Duma", "iso_1_code": null, "iso_3_code": "dma", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6359", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6358", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzebi (B.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Njebi", "iso_1_code": null, "iso_3_code": "nzb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6361", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6360", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzebi (B.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tsaangi", "iso_1_code": null, "iso_3_code": "tsa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6363", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6362", + "scripts": [], + "own_tokenizer": false }, { "name": "Shira-Punu (B.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bwisi", "iso_1_code": null, "iso_3_code": "bwz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6365", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6364", + "scripts": [], + "own_tokenizer": false }, { "name": "Shira-Punu (B.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Barama", "iso_1_code": null, "iso_3_code": "bbg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6367", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6366", + "scripts": [], + "own_tokenizer": false }, { "name": "Shira-Punu (B.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Vumbu", "iso_1_code": null, "iso_3_code": "vum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6369", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6368", + "scripts": [], + "own_tokenizer": false }, { "name": "Shira-Punu (B.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sira", "iso_1_code": null, "iso_3_code": "swj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6371", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6370", + "scripts": [], + "own_tokenizer": false }, { "name": "Shira-Punu (B.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sangu", "iso_1_code": null, "iso_3_code": "snq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6373", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6372", + "scripts": [], + "own_tokenizer": false }, { "name": "Shira-Punu (B.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Punu", "iso_1_code": null, "iso_3_code": "puu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6375", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6374", + "scripts": [], + "own_tokenizer": false }, { "name": "Shira-Punu (B.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lumbu", "iso_1_code": null, "iso_3_code": "lup", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6377", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6376", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke (B.701)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tchitchege", "iso_1_code": null, "iso_3_code": "tck", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6379", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6378", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke (B.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Teke-Tege", "iso_1_code": null, "iso_3_code": "teg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6381", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6380", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke (B.72)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngungwel", "iso_1_code": null, "iso_3_code": "ngz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6383", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6382", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke (B.73)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yaka", "iso_1_code": null, "iso_3_code": "iyx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6385", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke-Laali", "iso_1_code": null, "iso_3_code": "lli", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6386", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke-Tsaayi", "iso_1_code": null, "iso_3_code": "tyi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6387", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke-Tyee", "iso_1_code": null, "iso_3_code": "tyx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6388", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6384", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke (B.74)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Teke-Eboo", "iso_1_code": null, "iso_3_code": "ebo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6390", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke-Nzikou", "iso_1_code": null, "iso_3_code": "nzu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6391", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6389", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke (B.75)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Teke, Ibali", "iso_1_code": null, "iso_3_code": "tek", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6393", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6392", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke (B.77)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Teke-Wuumu", "iso_1_code": null, "iso_3_code": "ifm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6395", + "scripts": [], + "own_tokenizer": false }, { "name": "Teke-Kukuya", "iso_1_code": null, "iso_3_code": "kkw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6396", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6394", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.81)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tiene", "iso_1_code": null, "iso_3_code": "tii", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6397", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.82)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boma", "iso_1_code": null, "iso_3_code": "boh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6400", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6399", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.83)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mfinu", "iso_1_code": null, "iso_3_code": "zmf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6402", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6401", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.84)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbuun", "iso_1_code": null, "iso_3_code": "zmp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6404", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6403", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.85)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Songo", "iso_1_code": null, "iso_3_code": "soo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6406", + "scripts": [], + "own_tokenizer": false }, { "name": "Iyansi", "iso_1_code": null, "iso_3_code": "yns", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6407", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6405", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.86)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ding", "iso_1_code": null, "iso_3_code": "diz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6409", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6408", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.861)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngwii", "iso_1_code": null, "iso_3_code": "nlo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6411", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6410", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.862)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lwel", "iso_1_code": null, "iso_3_code": "lvl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6413", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6412", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.864)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngongo", "iso_1_code": null, "iso_3_code": "noq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6415", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6414", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiene-Yanzi (B.865)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nzadi", "iso_1_code": null, "iso_3_code": "nzd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6417", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6416", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsogo (B.301)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eviya", "iso_1_code": null, "iso_3_code": "gev", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6419", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6418", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsogo (B.302)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Simba", "iso_1_code": null, "iso_3_code": "sbw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6421", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6420", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsogo (B.304)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pinji", "iso_1_code": null, "iso_3_code": "pic", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6423", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6422", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsogo (B.305)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bubi", "iso_1_code": null, "iso_3_code": "buw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6425", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6424", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsogo (B.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tsogo", "iso_1_code": null, "iso_3_code": "tsv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6427", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6426", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsogo (B.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kande", "iso_1_code": null, "iso_3_code": "kbs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6429", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6428", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6320", + "scripts": [], + "own_tokenizer": false }, { "name": "C", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bangi-Ntomba (C.30)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bangala", "iso_1_code": null, "iso_3_code": "bxg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6432", + "scripts": [], + "own_tokenizer": false }, { "name": "Lingala", "iso_1_code": "ln", "iso_3_code": "lin", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6433", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6431", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.302)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bolondo", "iso_1_code": null, "iso_3_code": "bzm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6435", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6434", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baloi", "iso_1_code": null, "iso_3_code": "biz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6437", + "scripts": [], + "own_tokenizer": false }, { "name": "Likila", "iso_1_code": null, "iso_3_code": "lie", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6438", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6436", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mabaale", "iso_1_code": null, "iso_3_code": "mmz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6439", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.312)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndobo", "iso_1_code": null, "iso_3_code": "ndw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6442", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6441", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bangi", "iso_1_code": null, "iso_3_code": "bni", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6444", + "scripts": [], + "own_tokenizer": false }, { "name": "Moi", "iso_1_code": null, "iso_3_code": "mow", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6445", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6443", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.321)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Libinza", "iso_1_code": null, "iso_3_code": "liz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6447", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6446", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sengele", "iso_1_code": null, "iso_3_code": "szg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6449", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6448", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sakata", "iso_1_code": null, "iso_3_code": "skt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6451", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6450", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bolia", "iso_1_code": null, "iso_3_code": "bli", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6453", + "scripts": [], + "own_tokenizer": false }, { "name": "Ntomba", "iso_1_code": null, "iso_3_code": "nto", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6454", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6452", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.36)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boloki", "iso_1_code": null, "iso_3_code": "bkt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6456", + "scripts": [], + "own_tokenizer": false }, { "name": "Lusengo", "iso_1_code": null, "iso_3_code": "lse", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6457", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndolo", "iso_1_code": null, "iso_3_code": "ndl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6458", + "scripts": [], + "own_tokenizer": false }, { "name": "Yamongeri", "iso_1_code": null, "iso_3_code": "ymg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6459", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6455", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.37)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Budja", "iso_1_code": null, "iso_3_code": "bja", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6461", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6460", + "scripts": [], + "own_tokenizer": false }, { "name": "Bangi-Ntomba (C.371)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tembo", "iso_1_code": null, "iso_3_code": "tmv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6463", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6462", + "scripts": [], + "own_tokenizer": false }, { "name": "Bushoong (C.81)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dengese", "iso_1_code": null, "iso_3_code": "dez", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6465", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6464", + "scripts": [], + "own_tokenizer": false }, { "name": "Bushoong (C.82)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ohendo", "iso_1_code": null, "iso_3_code": "soe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6467", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6466", + "scripts": [], + "own_tokenizer": false }, { "name": "Bushoong (C.83)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bushoong", "iso_1_code": null, "iso_3_code": "buf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6469", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6468", + "scripts": [], + "own_tokenizer": false }, { "name": "Bushoong (C.84)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lele", "iso_1_code": null, "iso_3_code": "lel", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6471", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6470", + "scripts": [], + "own_tokenizer": false }, { "name": "Bushoong (C.85)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wongo", "iso_1_code": null, "iso_3_code": "won", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6473", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6472", + "scripts": [], + "own_tokenizer": false }, { "name": "Mboshi (C.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mboko", "iso_1_code": null, "iso_3_code": "mdu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6475", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6474", + "scripts": [], + "own_tokenizer": false }, { "name": "Mboshi (C.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akwa", "iso_1_code": null, "iso_3_code": "akw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6477", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6476", + "scripts": [], + "own_tokenizer": false }, { "name": "Mboshi (C.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koyo", "iso_1_code": null, "iso_3_code": "koh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6479", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6478", + "scripts": [], + "own_tokenizer": false }, { "name": "Mboshi (C.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbosi", "iso_1_code": null, "iso_3_code": "mdw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6481", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6480", + "scripts": [], + "own_tokenizer": false }, { "name": "Mboshi (C.26)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Likwala", "iso_1_code": null, "iso_3_code": "kwc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6483", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6482", + "scripts": [], + "own_tokenizer": false }, { "name": "Mboshi (C.27)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Likuba", "iso_1_code": null, "iso_3_code": "kxx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6485", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6484", + "scripts": [], + "own_tokenizer": false }, { "name": "Mongo-Nkundo (C.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mongo-Nkundu", "iso_1_code": null, "iso_3_code": "lol", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6487", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6486", + "scripts": [], + "own_tokenizer": false }, { "name": "Mongo-Nkundo (C.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lalia", "iso_1_code": null, "iso_3_code": "lal", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6489", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6488", + "scripts": [], + "own_tokenizer": false }, { "name": "Mongo-Nkundo (C.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngando", "iso_1_code": null, "iso_3_code": "nxd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6491", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6490", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pagibete", "iso_1_code": null, "iso_3_code": "pae", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6493", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6492", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kango", "iso_1_code": null, "iso_3_code": "kty", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6495", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6494", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngombe", "iso_1_code": null, "iso_3_code": "ngc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6497", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6496", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.411)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bomboma", "iso_1_code": null, "iso_3_code": "bws", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6499", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6498", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.412)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bamwe", "iso_1_code": null, "iso_3_code": "bmg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6501", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6500", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.413)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dzando", "iso_1_code": null, "iso_3_code": "dzn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6503", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6502", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.414)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ligenza", "iso_1_code": null, "iso_3_code": "lgz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6505", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6504", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bwela", "iso_1_code": null, "iso_3_code": "bwl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6507", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6506", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bwa", "iso_1_code": null, "iso_3_code": "bww", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6509", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6508", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.441)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Babango", "iso_1_code": null, "iso_3_code": "bbm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6511", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6510", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombe (C.45)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngelima", "iso_1_code": null, "iso_3_code": "agh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6513", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6512", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.101)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dibole", "iso_1_code": null, "iso_3_code": "bvx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6515", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6514", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.102)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngando", "iso_1_code": null, "iso_3_code": "ngd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6517", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6516", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.104)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yaka", "iso_1_code": null, "iso_3_code": "axk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6519", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6518", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngundi", "iso_1_code": null, "iso_3_code": "ndn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6520", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pande", "iso_1_code": null, "iso_3_code": "bkj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6523", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6522", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbati", "iso_1_code": null, "iso_3_code": "mdn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6525", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6524", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bomitaba", "iso_1_code": null, "iso_3_code": "zmx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6527", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6526", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.143)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bonjo", "iso_1_code": null, "iso_3_code": "bok", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6529", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6528", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bongili", "iso_1_code": null, "iso_3_code": "bui", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6531", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6530", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boko", "iso_1_code": null, "iso_3_code": "bkp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6533", + "scripts": [], + "own_tokenizer": false }, { "name": "Lobala", "iso_1_code": null, "iso_3_code": "loq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6534", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6532", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.161)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bomboli", "iso_1_code": null, "iso_3_code": "bml", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6536", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6535", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngondi (C.162)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bozaba", "iso_1_code": null, "iso_3_code": "bzo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6538", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6537", + "scripts": [], + "own_tokenizer": false }, { "name": "Soko-Kele (C.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbesa", "iso_1_code": null, "iso_3_code": "zms", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6540", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6539", + "scripts": [], + "own_tokenizer": false }, { "name": "Soko-Kele (C.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "So", "iso_1_code": null, "iso_3_code": "soc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6542", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6541", + "scripts": [], + "own_tokenizer": false }, { "name": "Soko-Kele (C.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Poke", "iso_1_code": null, "iso_3_code": "pof", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6544", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6543", + "scripts": [], + "own_tokenizer": false }, { "name": "Soko-Kele (C.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lombo", "iso_1_code": null, "iso_3_code": "loo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6546", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6545", + "scripts": [], + "own_tokenizer": false }, { "name": "Soko-Kele (C.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kele", "iso_1_code": null, "iso_3_code": "khy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6548", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6547", + "scripts": [], + "own_tokenizer": false }, { "name": "Soko-Kele (C.56)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Foma", "iso_1_code": null, "iso_3_code": "fom", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6550", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6549", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetela (C.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hamba", "iso_1_code": null, "iso_3_code": "hba", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6552", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetela", "iso_1_code": null, "iso_3_code": "tll", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6553", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6551", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetela (C.72)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kusu", "iso_1_code": null, "iso_3_code": "ksv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6555", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6554", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetela (C.73)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nkutu", "iso_1_code": null, "iso_3_code": "nkw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6557", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6556", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetela (C.74)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6558", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetela (C.75)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kela", "iso_1_code": null, "iso_3_code": "kel", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6560", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6559", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetela (C.76)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ombo", "iso_1_code": null, "iso_3_code": "oml", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6562", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6561", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6430", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6196", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5531", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndemli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndemli", "iso_1_code": null, "iso_3_code": "nml", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6564", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6563", + "scripts": [], + "own_tokenizer": false }, { "name": "Tikar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tikar", "iso_1_code": null, "iso_3_code": "tik", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6566", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6565", + "scripts": [], + "own_tokenizer": false }, { "name": "Tivoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Abon", "iso_1_code": null, "iso_3_code": "abo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6568", + "scripts": [], + "own_tokenizer": false }, { "name": "Esimbi", "iso_1_code": null, "iso_3_code": "ags", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6569", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambo", "iso_1_code": null, "iso_3_code": "amb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6570", + "scripts": [], + "own_tokenizer": false }, { "name": "Ipulo", "iso_1_code": null, "iso_3_code": "ass", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6571", + "scripts": [], + "own_tokenizer": false }, { "name": "Iceve-Maci", "iso_1_code": null, "iso_3_code": "bec", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6572", + "scripts": [], + "own_tokenizer": false }, { "name": "Balo", "iso_1_code": null, "iso_3_code": "bqo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6573", + "scripts": [], + "own_tokenizer": false }, { "name": "Bitare", "iso_1_code": null, "iso_3_code": "brt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6574", + "scripts": [], + "own_tokenizer": false }, { "name": "Batu", "iso_1_code": null, "iso_3_code": "btu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6575", + "scripts": [], + "own_tokenizer": false }, { "name": "Evant", "iso_1_code": null, "iso_3_code": "bzz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6576", + "scripts": [], + "own_tokenizer": false }, { "name": "Caka", "iso_1_code": null, "iso_3_code": "ckx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6577", + "scripts": [], + "own_tokenizer": false }, { "name": "Eman", "iso_1_code": null, "iso_3_code": "emn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6578", + "scripts": [], + "own_tokenizer": false }, { "name": "Mesaka", "iso_1_code": null, "iso_3_code": "iyo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6579", + "scripts": [], + "own_tokenizer": false }, { "name": "Manta", "iso_1_code": null, "iso_3_code": "myg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6580", + "scripts": [], + "own_tokenizer": false }, { "name": "Osatu", "iso_1_code": null, "iso_3_code": "ost", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6581", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiv", "iso_1_code": null, "iso_3_code": "tiv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6582", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Iyive", "iso_1_code": null, "iso_3_code": "uiv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6583", + "scripts": [], + "own_tokenizer": false }, { "name": "Itang", "iso_1_code": null, "iso_3_code": "uta", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6584", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6567", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Busuu", "iso_1_code": null, "iso_3_code": "bju", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6586", + "scripts": [], + "own_tokenizer": false }, { "name": "Bishuo", "iso_1_code": null, "iso_3_code": "bwh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6587", + "scripts": [], + "own_tokenizer": false }, { "name": "Bikya", "iso_1_code": null, "iso_3_code": "byb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6588", + "scripts": [], + "own_tokenizer": false }, { "name": "Moingi", "iso_1_code": null, "iso_3_code": "mwz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6589", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6585", + "scripts": [], + "own_tokenizer": false }, { "name": "Wide Grassfields", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Menchum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Befang", "iso_1_code": null, "iso_3_code": "bby", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6592", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6591", + "scripts": [], + "own_tokenizer": false }, { "name": "Narrow Grassfields", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fum", "iso_1_code": null, "iso_3_code": "fum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6594", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbam-Nkam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bamileke", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ghom\u00e1l\u00e1\u2019", "iso_1_code": null, "iso_3_code": "bbj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6597", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kwa\u2019", "iso_1_code": null, "iso_3_code": "bko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6598", + "scripts": [], + "own_tokenizer": false }, { "name": "Fe\u2019fe\u2019", "iso_1_code": null, "iso_3_code": "fmp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6599", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngomba", "iso_1_code": null, "iso_3_code": "jgo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6600", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngombale", "iso_1_code": null, "iso_3_code": "nla", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6601", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngiemboon", "iso_1_code": null, "iso_3_code": "nnh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6602", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nda\u2019nda\u2019", "iso_1_code": null, "iso_3_code": "nnz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6603", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngwe", "iso_1_code": null, "iso_3_code": "nwe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6604", + "scripts": [], + "own_tokenizer": false }, { "name": "Mengaka", "iso_1_code": null, "iso_3_code": "xmg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6605", + "scripts": [], + "own_tokenizer": false }, { "name": "Yemba", "iso_1_code": null, "iso_3_code": "ybb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6606", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6596", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngemba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awing", "iso_1_code": null, "iso_3_code": "azo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6608", + "scripts": [], + "own_tokenizer": false }, { "name": "Bambili-Bambui", "iso_1_code": null, "iso_3_code": "baw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6609", + "scripts": [], + "own_tokenizer": false }, { "name": "Bafut", "iso_1_code": null, "iso_3_code": "bfd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6610", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Beba", "iso_1_code": null, "iso_3_code": "bfp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6611", + "scripts": [], + "own_tokenizer": false }, { "name": "Mankong", "iso_1_code": null, "iso_3_code": "bqt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6612", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpati", "iso_1_code": null, "iso_3_code": "koc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6613", + "scripts": [], + "own_tokenizer": false }, { "name": "Mendankwe-Nkwen", "iso_1_code": null, "iso_3_code": "mfd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6614", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngemba", "iso_1_code": null, "iso_3_code": "nge", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6615", + "scripts": [], + "own_tokenizer": false }, { "name": "Pinyin", "iso_1_code": null, "iso_3_code": "pny", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6616", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6607", + "scripts": [], + "own_tokenizer": false }, { "name": "Nkambe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lidzonka", "iso_1_code": null, "iso_3_code": "add", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6618", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwaja", "iso_1_code": null, "iso_3_code": "kdz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6619", + "scripts": [], + "own_tokenizer": false }, { "name": "Limbum", "iso_1_code": null, "iso_3_code": "lmp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6620", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mbo\u2019", "iso_1_code": null, "iso_3_code": "mtk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6621", + "scripts": [], + "own_tokenizer": false }, { "name": "Mfumte", "iso_1_code": null, "iso_3_code": "nfu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6622", + "scripts": [], + "own_tokenizer": false }, { "name": "Yamba", "iso_1_code": null, "iso_3_code": "yam", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6623", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6617", + "scripts": [], + "own_tokenizer": false }, { "name": "Nun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bamun", "iso_1_code": null, "iso_3_code": "bax", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6625", + "scripts": [], + "own_tokenizer": false }, { "name": "Chopechop", "iso_1_code": null, "iso_3_code": "bbq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6626", + "scripts": [], + "own_tokenizer": false }, { "name": "Supapya", "iso_1_code": null, "iso_3_code": "bbw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6627", + "scripts": [], + "own_tokenizer": false }, { "name": "Mengambo", "iso_1_code": null, "iso_3_code": "bce", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6628", + "scripts": [], + "own_tokenizer": false }, { "name": "Chufie\u2019", "iso_1_code": null, "iso_3_code": "bfj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6629", + "scripts": [], + "own_tokenizer": false }, { "name": "Mendenkye", "iso_1_code": null, "iso_3_code": "bgj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6630", + "scripts": [], + "own_tokenizer": false }, { "name": "Chrambo", "iso_1_code": null, "iso_3_code": "bmo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6631", + "scripts": [], + "own_tokenizer": false }, { "name": "Medumba", "iso_1_code": null, "iso_3_code": "byv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6632", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mungaka", "iso_1_code": null, "iso_3_code": "mhk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6633", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6624", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6595", + "scripts": [], + "own_tokenizer": false }, { "name": "Momo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Menka", "iso_1_code": null, "iso_3_code": "mea", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6635", + "scripts": [], + "own_tokenizer": false }, { "name": "Meta\u2019", "iso_1_code": null, "iso_3_code": "mgo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "6636", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mundani", "iso_1_code": null, "iso_3_code": "mnf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "6637", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngamambo", "iso_1_code": null, "iso_3_code": "nbv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6638", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngie", "iso_1_code": null, "iso_3_code": "ngj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6639", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngwo", "iso_1_code": null, "iso_3_code": "ngn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6640", + "scripts": [], + "own_tokenizer": false }, { "name": "Njen", "iso_1_code": null, "iso_3_code": "njj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6641", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngoshie", "iso_1_code": null, "iso_3_code": "nsh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6642", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6634", + "scripts": [], + "own_tokenizer": false }, { "name": "Ring", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Center", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Babanki", "iso_1_code": null, "iso_3_code": "bbk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6645", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mmen", "iso_1_code": null, "iso_3_code": "bfm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6646", + "scripts": [], + "own_tokenizer": false }, { "name": "Kom", "iso_1_code": null, "iso_3_code": "bkm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6647", + "scripts": [], + "own_tokenizer": false }, { "name": "Bum", "iso_1_code": null, "iso_3_code": "bmv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6648", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mbessa", "iso_1_code": null, "iso_3_code": "emz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6649", + "scripts": [], + "own_tokenizer": false }, { "name": "Kung", "iso_1_code": null, "iso_3_code": "kfl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6650", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuk", "iso_1_code": null, "iso_3_code": "kfn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6651", + "scripts": [], + "own_tokenizer": false }, { "name": "Oku", "iso_1_code": null, "iso_3_code": "oku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6652", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6644", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lamnso\u02bc", "iso_1_code": null, "iso_3_code": "lns", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6654", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6653", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Vengo", "iso_1_code": null, "iso_3_code": "bav", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6656", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wushi", "iso_1_code": null, "iso_3_code": "bse", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6657", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngiemekohke", "iso_1_code": null, "iso_3_code": "bvm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6658", + "scripts": [], + "own_tokenizer": false }, { "name": "Kenswei Nsei", "iso_1_code": null, "iso_3_code": "ndb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6659", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6655", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aghem", "iso_1_code": null, "iso_3_code": "agq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6661", + "scripts": [], + "own_tokenizer": false }, { "name": "Isu", "iso_1_code": null, "iso_3_code": "isu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6662", + "scripts": [], + "own_tokenizer": false }, { "name": "Laimbue", "iso_1_code": null, "iso_3_code": "lmx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6663", + "scripts": [], + "own_tokenizer": false }, { "name": "Weh", "iso_1_code": null, "iso_3_code": "weh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6664", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhoa", "iso_1_code": null, "iso_3_code": "zhw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "6665", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6660", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6643", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nde-Gbite", "iso_1_code": null, "iso_3_code": "ned", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6667", + "scripts": [], + "own_tokenizer": false }, { "name": "Viti", "iso_1_code": null, "iso_3_code": "vit", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6668", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6666", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6593", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Momo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ambele", "iso_1_code": null, "iso_3_code": "ael", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6670", + "scripts": [], + "own_tokenizer": false }, { "name": "Atong", "iso_1_code": null, "iso_3_code": "ato", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6671", + "scripts": [], + "own_tokenizer": false }, { "name": "Busam", "iso_1_code": null, "iso_3_code": "bxs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6672", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6669", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6590", + "scripts": [], + "own_tokenizer": false }, { "name": "Yemne-Kimbi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mundabli", "iso_1_code": null, "iso_3_code": "boe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6674", + "scripts": [], + "own_tokenizer": false }, { "name": "Fang", "iso_1_code": null, "iso_3_code": "fak", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6675", + "scripts": [], + "own_tokenizer": false }, { "name": "Koshin", "iso_1_code": null, "iso_3_code": "kid", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6676", + "scripts": [], + "own_tokenizer": false }, { "name": "Mungbam", "iso_1_code": null, "iso_3_code": "mij", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6677", + "scripts": [], + "own_tokenizer": false }, { "name": "Ajumbu", "iso_1_code": null, "iso_3_code": "muc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6678", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6673", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5485", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "\u00c1nc\u00e1", "iso_1_code": null, "iso_3_code": "acb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6680", + "scripts": [], + "own_tokenizer": false }, { "name": "Buru", "iso_1_code": null, "iso_3_code": "bqw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6681", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6679", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5452", + "scripts": [], + "own_tokenizer": false }, { "name": "Cross River", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bendi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Utugwang-Irungene-Afrike", "iso_1_code": null, "iso_3_code": "afe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6684", + "scripts": [], + "own_tokenizer": false }, { "name": "Elege", "iso_1_code": null, "iso_3_code": "alf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6685", + "scripts": [], + "own_tokenizer": false }, { "name": "Bekwarra", "iso_1_code": null, "iso_3_code": "bkv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6686", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bokyi", "iso_1_code": null, "iso_3_code": "bky", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6687", + "scripts": [], + "own_tokenizer": false }, { "name": "Bete-Bendi", "iso_1_code": null, "iso_3_code": "btt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6688", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bumaji", "iso_1_code": null, "iso_3_code": "byp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6689", + "scripts": [], + "own_tokenizer": false }, { "name": "Abanglekuo", "iso_1_code": null, "iso_3_code": "bzy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6690", + "scripts": [], + "own_tokenizer": false }, { "name": "Ubang", "iso_1_code": null, "iso_3_code": "uba", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6691", + "scripts": [], + "own_tokenizer": false }, { "name": "Bukpe", "iso_1_code": null, "iso_3_code": "ukp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6692", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6683", + "scripts": [], + "own_tokenizer": false }, { "name": "Delta Cross", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Delta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Abureni", "iso_1_code": null, "iso_3_code": "mgj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6695", + "scripts": [], + "own_tokenizer": false }, { "name": "Obulom", "iso_1_code": null, "iso_3_code": "obu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6696", + "scripts": [], + "own_tokenizer": false }, { "name": "Ogbia", "iso_1_code": null, "iso_3_code": "ogb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6697", + "scripts": [], + "own_tokenizer": false }, { "name": "Ogbogolo", "iso_1_code": null, "iso_3_code": "ogg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6698", + "scripts": [], + "own_tokenizer": false }, { "name": "Ogbronuagum", "iso_1_code": null, "iso_3_code": "ogu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6699", + "scripts": [], + "own_tokenizer": false }, { "name": "O\u2019chi\u2019chi\u2019", "iso_1_code": null, "iso_3_code": "xoc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6700", + "scripts": [], + "own_tokenizer": false }, { "name": "Abua-Odual", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Abua", "iso_1_code": null, "iso_3_code": "abn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6702", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Odual", "iso_1_code": null, "iso_3_code": "odu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6703", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6701", + "scripts": [], + "own_tokenizer": false }, { "name": "Kugbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kugbo", "iso_1_code": null, "iso_3_code": "kes", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6705", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6704", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6694", + "scripts": [], + "own_tokenizer": false }, { "name": "Lower Cross", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Obolo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Obolo", "iso_1_code": null, "iso_3_code": "ann", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6708", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Eki", "iso_1_code": null, "iso_3_code": "eki", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6709", + "scripts": [], + "own_tokenizer": false }, { "name": "Idere", "iso_1_code": null, "iso_3_code": "ide", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6710", + "scripts": [], + "own_tokenizer": false }, { "name": "Ebughu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ebughu", "iso_1_code": null, "iso_3_code": "ebg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6712", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6711", + "scripts": [], + "own_tokenizer": false }, { "name": "Efai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Efai", "iso_1_code": null, "iso_3_code": "efa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6714", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6713", + "scripts": [], + "own_tokenizer": false }, { "name": "Efik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Anaang", "iso_1_code": null, "iso_3_code": "anw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6716", + "scripts": [], + "own_tokenizer": false }, { "name": "Efik", "iso_1_code": null, "iso_3_code": "efi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "6717", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ibibio", "iso_1_code": null, "iso_3_code": "ibb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6718", + "scripts": [], + "own_tokenizer": false }, { "name": "Ukwa", "iso_1_code": null, "iso_3_code": "ukq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6719", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6715", + "scripts": [], + "own_tokenizer": false }, { "name": "Ekit", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ekit", "iso_1_code": null, "iso_3_code": "eke", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6721", + "scripts": [], + "own_tokenizer": false }, { "name": "Etebi", "iso_1_code": null, "iso_3_code": "etb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6722", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6720", + "scripts": [], + "own_tokenizer": false }, { "name": "Enwang-Uda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Enwan", "iso_1_code": null, "iso_3_code": "enw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6724", + "scripts": [], + "own_tokenizer": false }, { "name": "Uda", "iso_1_code": null, "iso_3_code": "uda", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6725", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6723", + "scripts": [], + "own_tokenizer": false }, { "name": "Ibino", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ibino", "iso_1_code": null, "iso_3_code": "ibn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6727", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6726", + "scripts": [], + "own_tokenizer": false }, { "name": "Ibuoro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ibuoro", "iso_1_code": null, "iso_3_code": "ibr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6729", + "scripts": [], + "own_tokenizer": false }, { "name": "Itu Mbon Uzo", "iso_1_code": null, "iso_3_code": "itm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6730", + "scripts": [], + "own_tokenizer": false }, { "name": "Ito", "iso_1_code": null, "iso_3_code": "itw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6731", + "scripts": [], + "own_tokenizer": false }, { "name": "Nkari", "iso_1_code": null, "iso_3_code": "nkz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6732", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6728", + "scripts": [], + "own_tokenizer": false }, { "name": "Iko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Iko", "iso_1_code": null, "iso_3_code": "iki", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6734", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6733", + "scripts": [], + "own_tokenizer": false }, { "name": "Ilue", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ilue", "iso_1_code": null, "iso_3_code": "ilv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6736", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6735", + "scripts": [], + "own_tokenizer": false }, { "name": "Okobo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Okobo", "iso_1_code": null, "iso_3_code": "okb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6738", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6737", + "scripts": [], + "own_tokenizer": false }, { "name": "Oro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oro", "iso_1_code": null, "iso_3_code": "orx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6740", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6739", + "scripts": [], + "own_tokenizer": false }, { "name": "Usaghade", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Usaghade", "iso_1_code": null, "iso_3_code": "usk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6742", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6741", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6707", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6706", + "scripts": [], + "own_tokenizer": false }, { "name": "Ogoni", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gokana", "iso_1_code": null, "iso_3_code": "gkn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6745", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Khana", "iso_1_code": null, "iso_3_code": "ogo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6746", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "T\u00e8\u00e8\u0323 \u0323", "iso_1_code": null, "iso_3_code": "tkq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6747", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6744", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baan", "iso_1_code": null, "iso_3_code": "bvj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6749", + "scripts": [], + "own_tokenizer": false }, { "name": "Eleme", "iso_1_code": null, "iso_3_code": "elm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6750", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6748", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6743", + "scripts": [], + "own_tokenizer": false }, { "name": "Upper Cross", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Agoi-Doko-Iyoniyong", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Iyongiyong", "iso_1_code": null, "iso_3_code": "bbs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6753", + "scripts": [], + "own_tokenizer": false }, { "name": "Robambami", "iso_1_code": null, "iso_3_code": "ibm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6754", + "scripts": [], + "own_tokenizer": false }, { "name": "Deko-Dusanga", "iso_1_code": null, "iso_3_code": "uya", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6755", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6752", + "scripts": [], + "own_tokenizer": false }, { "name": "Akpet", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ukpet-Ehom", "iso_1_code": null, "iso_3_code": "akd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6757", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6756", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "East-West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ikom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Olulumo-Ikom", "iso_1_code": null, "iso_3_code": "iko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6761", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6760", + "scripts": [], + "own_tokenizer": false }, { "name": "Loko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lubila", "iso_1_code": null, "iso_3_code": "kcc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6763", + "scripts": [], + "own_tokenizer": false }, { "name": "Nkukoli", "iso_1_code": null, "iso_3_code": "nbo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6764", + "scripts": [], + "own_tokenizer": false }, { "name": "Lokaa", "iso_1_code": null, "iso_3_code": "yaz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6765", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6762", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbembe-Legbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Legbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Legbo", "iso_1_code": null, "iso_3_code": "agb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6768", + "scripts": [], + "own_tokenizer": false }, { "name": "Leyigha", "iso_1_code": null, "iso_3_code": "ayi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6769", + "scripts": [], + "own_tokenizer": false }, { "name": "Lenyima", "iso_1_code": null, "iso_3_code": "ldg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6770", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6767", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbembe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbembe, Cross River", "iso_1_code": null, "iso_3_code": "mfn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6772", + "scripts": [], + "own_tokenizer": false } - ] - } - ] + ], + "node_i": "6771", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6766", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6759", + "scripts": [], + "own_tokenizer": false }, { "name": "North-South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koring-Kukele", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koring", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oring", "iso_1_code": null, "iso_3_code": "org", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6776", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6775", + "scripts": [], + "own_tokenizer": false }, { "name": "Kukele", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Uzekwe", "iso_1_code": null, "iso_3_code": "eze", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6778", + "scripts": [], + "own_tokenizer": false }, { "name": "Kukele", "iso_1_code": null, "iso_3_code": "kez", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6779", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6777", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6774", + "scripts": [], + "own_tokenizer": false }, { "name": "Ubaghara-Kohumono", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kohumono", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hohumono", "iso_1_code": null, "iso_3_code": "bcs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6782", + "scripts": [], + "own_tokenizer": false }, { "name": "Umon", "iso_1_code": null, "iso_3_code": "umm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6783", + "scripts": [], + "own_tokenizer": false }, { "name": "Agwagwune", "iso_1_code": null, "iso_3_code": "yay", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6784", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6781", + "scripts": [], + "own_tokenizer": false }, { "name": "Ubaghara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ubaghara", "iso_1_code": null, "iso_3_code": "byc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6786", + "scripts": [], + "own_tokenizer": false } - ] - } - ] + ], + "node_i": "6785", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "6780", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6773", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6758", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiong-Korop", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kiong", "iso_1_code": null, "iso_3_code": "kkm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6788", + "scripts": [], + "own_tokenizer": false }, { "name": "Durop", "iso_1_code": null, "iso_3_code": "krp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6789", + "scripts": [], + "own_tokenizer": false }, { "name": "Odut", "iso_1_code": null, "iso_3_code": "oda", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6790", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6787", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6751", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6693", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6682", + "scripts": [], + "own_tokenizer": false }, { "name": "Defoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Akokoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Arigidi", "iso_1_code": null, "iso_3_code": "aqg", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6793", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6792", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayere-Ahan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "\u00c0h\u00e0n", "iso_1_code": null, "iso_3_code": "ahn", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6795", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayere", "iso_1_code": null, "iso_3_code": "aye", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6796", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6794", + "scripts": [], + "own_tokenizer": false }, { "name": "Yoruboid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Edekiri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Ede Cabe", "iso_1_code": null, "iso_3_code": "cbj", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6799", + "scripts": [], + "own_tokenizer": false }, { "name": "Ede Ica", "iso_1_code": null, "iso_3_code": "ica", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6800", + "scripts": [], + "own_tokenizer": false }, { "name": "Ede Idaca", "iso_1_code": null, "iso_3_code": "idd", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6801", + "scripts": [], + "own_tokenizer": false }, { "name": "If\u00e8", "iso_1_code": null, "iso_3_code": "ife", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6802", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ede Ije", "iso_1_code": null, "iso_3_code": "ijj", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6803", + "scripts": [], + "own_tokenizer": false }, { "name": "Isekiri", "iso_1_code": null, "iso_3_code": "its", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6804", + "scripts": [], + "own_tokenizer": false }, { "name": "Lucumi", "iso_1_code": null, "iso_3_code": "luq", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6805", + "scripts": [], + "own_tokenizer": false }, { "name": "Mokole", "iso_1_code": null, "iso_3_code": "mkl", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6806", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nago, Southern", "iso_1_code": null, "iso_3_code": "nqg", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6807", + "scripts": [], + "own_tokenizer": false }, { "name": "Ede Nago, Kura", "iso_1_code": null, "iso_3_code": "nqk", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6808", + "scripts": [], + "own_tokenizer": false }, { "name": "Ulukwumi", "iso_1_code": null, "iso_3_code": "ulb", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6809", + "scripts": [], + "own_tokenizer": false }, { "name": "Nago, Northern", "iso_1_code": null, "iso_3_code": "xkb", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6810", + "scripts": [], + "own_tokenizer": false }, { "name": "Yoruba", "iso_1_code": "yo", "iso_3_code": "yor", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "6811", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "6798", + "scripts": [], + "own_tokenizer": false }, { "name": "Igala", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Igala", "iso_1_code": null, "iso_3_code": "igl", - "tokenizer": { - "name": "yoruba", - "tokenizer": "SpaCyTokenizer(\"yo\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6813", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6812", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6797", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6791", + "scripts": [], + "own_tokenizer": false }, { "name": "Edoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Delta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Degema", "iso_1_code": null, "iso_3_code": "deg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6816", + "scripts": [], + "own_tokenizer": false }, { "name": "Engenni", "iso_1_code": null, "iso_3_code": "enn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6817", + "scripts": [], + "own_tokenizer": false }, { "name": "Epie", "iso_1_code": null, "iso_3_code": "epi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6818", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6815", + "scripts": [], + "own_tokenizer": false }, { "name": "North-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ihievbe", "iso_1_code": null, "iso_3_code": "ihi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6820", + "scripts": [], + "own_tokenizer": false }, { "name": "Ikhin-Aokho", "iso_1_code": null, "iso_3_code": "ikh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6821", + "scripts": [], + "own_tokenizer": false }, { "name": "Edo-Esan-Ora", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Edo", "iso_1_code": null, "iso_3_code": "bin", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6823", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Emai-Iuleha-Ora", "iso_1_code": null, "iso_3_code": "ema", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6824", + "scripts": [], + "own_tokenizer": false }, { "name": "Esan", "iso_1_code": null, "iso_3_code": "ish", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6825", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6822", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghotuo-Uneme-Yekhee", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ghotuo", "iso_1_code": null, "iso_3_code": "aaa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6827", + "scripts": [], + "own_tokenizer": false }, { "name": "Ivbie North-Okpela-Arhe", "iso_1_code": null, "iso_3_code": "atg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6828", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Enwan", "iso_1_code": null, "iso_3_code": "env", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6829", + "scripts": [], + "own_tokenizer": false }, { "name": "Etsako", "iso_1_code": null, "iso_3_code": "ets", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6830", + "scripts": [], + "own_tokenizer": false }, { "name": "Igwe", "iso_1_code": null, "iso_3_code": "igw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6831", + "scripts": [], + "own_tokenizer": false }, { "name": "Ikpeshi", "iso_1_code": null, "iso_3_code": "ikp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6832", + "scripts": [], + "own_tokenizer": false }, { "name": "Ososo", "iso_1_code": null, "iso_3_code": "oso", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6833", + "scripts": [], + "own_tokenizer": false }, { "name": "Sasaru", "iso_1_code": null, "iso_3_code": "sxs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6834", + "scripts": [], + "own_tokenizer": false }, { "name": "Uneme", "iso_1_code": null, "iso_3_code": "une", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6835", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6826", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6819", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aduge", "iso_1_code": null, "iso_3_code": "adu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6837", + "scripts": [], + "own_tokenizer": false }, { "name": "Osse", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ehueun", "iso_1_code": null, "iso_3_code": "ehu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6839", + "scripts": [], + "own_tokenizer": false }, { "name": "Iyayu", "iso_1_code": null, "iso_3_code": "iya", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6840", + "scripts": [], + "own_tokenizer": false }, { "name": "Uhami", "iso_1_code": null, "iso_3_code": "uha", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6841", + "scripts": [], + "own_tokenizer": false }, { "name": "Ukue", "iso_1_code": null, "iso_3_code": "uku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6842", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6838", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akuku", "iso_1_code": null, "iso_3_code": "ayk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6844", + "scripts": [], + "own_tokenizer": false }, { "name": "Idesa", "iso_1_code": null, "iso_3_code": "ids", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6845", + "scripts": [], + "own_tokenizer": false }, { "name": "Okpe", "iso_1_code": null, "iso_3_code": "okx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6846", + "scripts": [], + "own_tokenizer": false }, { "name": "Oloma", "iso_1_code": null, "iso_3_code": "olm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6847", + "scripts": [], + "own_tokenizer": false }, { "name": "Okpamheri", "iso_1_code": null, "iso_3_code": "opa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6848", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6843", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6836", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eruwa", "iso_1_code": null, "iso_3_code": "erh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6850", + "scripts": [], + "own_tokenizer": false }, { "name": "Uvbie", "iso_1_code": null, "iso_3_code": "evh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6851", + "scripts": [], + "own_tokenizer": false }, { "name": "Isoko", "iso_1_code": null, "iso_3_code": "iso", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6852", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Okpe", "iso_1_code": null, "iso_3_code": "oke", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6853", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Urhobo", "iso_1_code": null, "iso_3_code": "urh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6854", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6849", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6814", + "scripts": [], + "own_tokenizer": false }, { "name": "Idomoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akweya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eloyi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ajiri", "iso_1_code": null, "iso_3_code": "afo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6858", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6857", + "scripts": [], + "own_tokenizer": false }, { "name": "Etulo-Idoma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Etulo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Etulo", "iso_1_code": null, "iso_3_code": "utr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6861", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6860", + "scripts": [], + "own_tokenizer": false }, { "name": "Idoma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Agatu", "iso_1_code": null, "iso_3_code": "agc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6863", + "scripts": [], + "own_tokenizer": false }, { "name": "Alago", "iso_1_code": null, "iso_3_code": "ala", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6864", + "scripts": [], + "own_tokenizer": false }, { "name": "Idoma", "iso_1_code": null, "iso_3_code": "idu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6865", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Igede", "iso_1_code": null, "iso_3_code": "ige", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6866", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yala", "iso_1_code": null, "iso_3_code": "yba", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6867", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6862", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6859", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6856", + "scripts": [], + "own_tokenizer": false }, { "name": "Yatye-Akpa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akweya", "iso_1_code": null, "iso_3_code": "akf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6869", + "scripts": [], + "own_tokenizer": false }, { "name": "Yace", "iso_1_code": null, "iso_3_code": "ekr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6870", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6868", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6855", + "scripts": [], + "own_tokenizer": false }, { "name": "Igboid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ekpeye", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ekpeye", "iso_1_code": null, "iso_3_code": "ekp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6873", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6872", + "scripts": [], + "own_tokenizer": false }, { "name": "Igbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ezaa", "iso_1_code": null, "iso_3_code": "eza", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6875", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mgbolizhia", "iso_1_code": null, "iso_3_code": "gmz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6876", + "scripts": [], + "own_tokenizer": false }, { "name": "Igbo", "iso_1_code": "ig", "iso_3_code": "ibo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6877", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ika", "iso_1_code": null, "iso_3_code": "ikk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6878", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ikwere", "iso_1_code": null, "iso_3_code": "ikw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6879", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ikwo", "iso_1_code": null, "iso_3_code": "iqw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6880", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Izii", "iso_1_code": null, "iso_3_code": "izz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6881", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ogbah", "iso_1_code": null, "iso_3_code": "ogc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6882", + "scripts": [], + "own_tokenizer": false }, { "name": "Ukwuani-Aboh-Ndoni", "iso_1_code": null, "iso_3_code": "ukw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6883", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6874", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6871", + "scripts": [], + "own_tokenizer": false }, { "name": "Jukunoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bete", "iso_1_code": null, "iso_3_code": "byf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6885", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jukun-Mbembe-Wurbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jukun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jukun Takum", "iso_1_code": null, "iso_3_code": "jbu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6889", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jibu", "iso_1_code": null, "iso_3_code": "jib", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6890", + "scripts": [], + "own_tokenizer": false }, { "name": "H\u00f5ne", "iso_1_code": null, "iso_3_code": "juh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6891", + "scripts": [], + "own_tokenizer": false }, { "name": "W\u00e3pha", "iso_1_code": null, "iso_3_code": "juw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6892", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6888", + "scripts": [], + "own_tokenizer": false }, { "name": "Kororofa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wannu", "iso_1_code": null, "iso_3_code": "jub", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6894", + "scripts": [], + "own_tokenizer": false }, { "name": "Wapan", "iso_1_code": null, "iso_3_code": "juk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6895", + "scripts": [], + "own_tokenizer": false }, { "name": "Jiba", "iso_1_code": null, "iso_3_code": "juo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6896", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6893", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbembe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbembe, Tigon", "iso_1_code": null, "iso_3_code": "nza", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6898", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6897", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shoo-Minda-Nye", "iso_1_code": null, "iso_3_code": "bcv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6900", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6899", + "scripts": [], + "own_tokenizer": false }, { "name": "Wurbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Karimjo", "iso_1_code": null, "iso_3_code": "cfg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6902", + "scripts": [], + "own_tokenizer": false }, { "name": "Jiru", "iso_1_code": null, "iso_3_code": "jrr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6903", + "scripts": [], + "own_tokenizer": false }, { "name": "Tita", "iso_1_code": null, "iso_3_code": "tdq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6904", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6901", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6887", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpan-Icen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Etkywan", "iso_1_code": null, "iso_3_code": "ich", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6906", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpan", "iso_1_code": null, "iso_3_code": "kpk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6907", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6905", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6886", + "scripts": [], + "own_tokenizer": false }, { "name": "Yukuben-Kuteb", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akum", "iso_1_code": null, "iso_3_code": "aku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6909", + "scripts": [], + "own_tokenizer": false }, { "name": "Beezen", "iso_1_code": null, "iso_3_code": "bnz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6910", + "scripts": [], + "own_tokenizer": false }, { "name": "Kapya", "iso_1_code": null, "iso_3_code": "klo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6911", + "scripts": [], + "own_tokenizer": false }, { "name": "Kutep", "iso_1_code": null, "iso_3_code": "kub", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6912", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yukuben", "iso_1_code": null, "iso_3_code": "ybl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6913", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6908", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6884", + "scripts": [], + "own_tokenizer": false }, { "name": "Kainji", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Amo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Amo", "iso_1_code": null, "iso_3_code": "amo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6917", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6916", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Jos", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Panawa", "iso_1_code": null, "iso_3_code": "pwb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6919", + "scripts": [], + "own_tokenizer": false }, { "name": "Jera", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gamo-Ningi", "iso_1_code": null, "iso_3_code": "bte", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6921", + "scripts": [], + "own_tokenizer": false }, { "name": "Izora", "iso_1_code": null, "iso_3_code": "cbo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6922", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunzuii", "iso_1_code": null, "iso_3_code": "dza", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6923", + "scripts": [], + "own_tokenizer": false }, { "name": "Lere", "iso_1_code": null, "iso_3_code": "gnh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6924", + "scripts": [], + "own_tokenizer": false }, { "name": "Gyem", "iso_1_code": null, "iso_3_code": "gye", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6925", + "scripts": [], + "own_tokenizer": false }, { "name": "Jere", "iso_1_code": null, "iso_3_code": "jer", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6926", + "scripts": [], + "own_tokenizer": false }, { "name": "Janji", "iso_1_code": null, "iso_3_code": "jni", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6927", + "scripts": [], + "own_tokenizer": false }, { "name": "Kudu-Camo", "iso_1_code": null, "iso_3_code": "kov", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6928", + "scripts": [], + "own_tokenizer": false }, { "name": "Lemoro", "iso_1_code": null, "iso_3_code": "ldj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6929", + "scripts": [], + "own_tokenizer": false }, { "name": "Iguta", "iso_1_code": null, "iso_3_code": "nar", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6930", + "scripts": [], + "own_tokenizer": false }, { "name": "Sheni", "iso_1_code": null, "iso_3_code": "scv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6931", + "scripts": [], + "own_tokenizer": false }, { "name": "Shau", "iso_1_code": null, "iso_3_code": "sqh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6932", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanga", "iso_1_code": null, "iso_3_code": "xsn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6933", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6920", + "scripts": [], + "own_tokenizer": false }, { "name": "Kauru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bina", "iso_1_code": null, "iso_3_code": "byj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6935", + "scripts": [], + "own_tokenizer": false }, { "name": "Dungu", "iso_1_code": null, "iso_3_code": "dbv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6936", + "scripts": [], + "own_tokenizer": false }, { "name": "Tugbiri-Niragu", "iso_1_code": null, "iso_3_code": "grh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6937", + "scripts": [], + "own_tokenizer": false }, { "name": "Kizamani", "iso_1_code": null, "iso_3_code": "izm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6938", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaivi", "iso_1_code": null, "iso_3_code": "kce", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6939", + "scripts": [], + "own_tokenizer": false }, { "name": "Vono", "iso_1_code": null, "iso_3_code": "kch", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6940", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinuku", "iso_1_code": null, "iso_3_code": "kkd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6941", + "scripts": [], + "own_tokenizer": false }, { "name": "Tumi", "iso_1_code": null, "iso_3_code": "kku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6942", + "scripts": [], + "own_tokenizer": false }, { "name": "Kono", "iso_1_code": null, "iso_3_code": "klk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6943", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurama", "iso_1_code": null, "iso_3_code": "krh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6944", + "scripts": [], + "own_tokenizer": false }, { "name": "Rishiwa", "iso_1_code": null, "iso_3_code": "rsw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6945", + "scripts": [], + "own_tokenizer": false }, { "name": "Mala", "iso_1_code": null, "iso_3_code": "ruy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6946", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruma", "iso_1_code": null, "iso_3_code": "ruz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6947", + "scripts": [], + "own_tokenizer": false }, { "name": "Vori", "iso_1_code": null, "iso_3_code": "sde", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6948", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6934", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6918", + "scripts": [], + "own_tokenizer": false }, { "name": "Piti-Atsam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Atsam", "iso_1_code": null, "iso_3_code": "cch", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6950", + "scripts": [], + "own_tokenizer": false }, { "name": "Abishi", "iso_1_code": null, "iso_3_code": "pcn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6951", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6949", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6915", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Basa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Basa-Gumna", "iso_1_code": null, "iso_3_code": "bsl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6954", + "scripts": [], + "own_tokenizer": false }, { "name": "Bassa-Kontagora", "iso_1_code": null, "iso_3_code": "bsr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6955", + "scripts": [], + "own_tokenizer": false }, { "name": "Basa-Gurmana", "iso_1_code": null, "iso_3_code": "buj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6956", + "scripts": [], + "own_tokenizer": false }, { "name": "Basa", "iso_1_code": null, "iso_3_code": "bzw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6957", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6953", + "scripts": [], + "own_tokenizer": false }, { "name": "Baushi-Gurmana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bauchi", "iso_1_code": null, "iso_3_code": "bsf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6959", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurmana", "iso_1_code": null, "iso_3_code": "gvm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6960", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6958", + "scripts": [], + "own_tokenizer": false }, { "name": "Duka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gwamhi-Wuri", "iso_1_code": null, "iso_3_code": "bga", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6962", + "scripts": [], + "own_tokenizer": false }, { "name": "Damakawa", "iso_1_code": null, "iso_3_code": "dam", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6963", + "scripts": [], + "own_tokenizer": false }, { "name": "C\u2019Lela", "iso_1_code": null, "iso_3_code": "dri", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6964", + "scripts": [], + "own_tokenizer": false }, { "name": "ut-Ma\u2019in", "iso_1_code": null, "iso_3_code": "gel", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6965", + "scripts": [], + "own_tokenizer": false }, { "name": "us-Saare", "iso_1_code": null, "iso_3_code": "uss", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6966", + "scripts": [], + "own_tokenizer": false }, { "name": "ut-Hun", "iso_1_code": null, "iso_3_code": "uth", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6967", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "6961", + "scripts": [], + "own_tokenizer": false }, { "name": "Kainji Lake", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tsucuba", "iso_1_code": null, "iso_3_code": "cbq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6969", + "scripts": [], + "own_tokenizer": false }, { "name": "Laru", "iso_1_code": null, "iso_3_code": "lan", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6970", + "scripts": [], + "own_tokenizer": false }, { "name": "Lopa", "iso_1_code": null, "iso_3_code": "lop", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6971", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6968", + "scripts": [], + "own_tokenizer": false }, { "name": "Kambari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cishingini", "iso_1_code": null, "iso_3_code": "asg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6973", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Baangi", "iso_1_code": null, "iso_3_code": "bqx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6974", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsikimba", "iso_1_code": null, "iso_3_code": "kdl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6975", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tsishingini", "iso_1_code": null, "iso_3_code": "tsw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6976", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tsuvadi", "iso_1_code": null, "iso_3_code": "tvd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6977", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6972", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamuku", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Acipa, Eastern", "iso_1_code": null, "iso_3_code": "acp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6979", + "scripts": [], + "own_tokenizer": false }, { "name": "Cicipu", "iso_1_code": null, "iso_3_code": "awc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6980", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamuku", "iso_1_code": null, "iso_3_code": "cdr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6981", + "scripts": [], + "own_tokenizer": false }, { "name": "Cahungwarya", "iso_1_code": null, "iso_3_code": "nat", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6982", + "scripts": [], + "own_tokenizer": false }, { "name": "Pangu", "iso_1_code": null, "iso_3_code": "png", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6983", + "scripts": [], + "own_tokenizer": false }, { "name": "Rogo", "iso_1_code": null, "iso_3_code": "rod", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6984", + "scripts": [], + "own_tokenizer": false }, { "name": "Shama-Sambuga", "iso_1_code": null, "iso_3_code": "sqa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6985", + "scripts": [], + "own_tokenizer": false }, { "name": "Fungwa", "iso_1_code": null, "iso_3_code": "ula", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6986", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6978", + "scripts": [], + "own_tokenizer": false }, { "name": "Reshe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Reshe", "iso_1_code": null, "iso_3_code": "res", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6988", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6987", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6952", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6914", + "scripts": [], + "own_tokenizer": false }, { "name": "Nupoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ebira-Gade", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gade", "iso_1_code": null, "iso_3_code": "ged", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6991", + "scripts": [], + "own_tokenizer": false }, { "name": "Ebira", "iso_1_code": null, "iso_3_code": "igb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6992", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6990", + "scripts": [], + "own_tokenizer": false }, { "name": "Nupe-Gbagyi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dibo", "iso_1_code": null, "iso_3_code": "dio", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6994", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbagyi-Gbari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gbagyi", "iso_1_code": null, "iso_3_code": "gbr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "6996", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gbari", "iso_1_code": null, "iso_3_code": "gby", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6997", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6995", + "scripts": [], + "own_tokenizer": false }, { "name": "Nupe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Asu", "iso_1_code": null, "iso_3_code": "aum", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "6999", + "scripts": [], + "own_tokenizer": false }, { "name": "Gupa-Abawa", "iso_1_code": null, "iso_3_code": "gpa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7000", + "scripts": [], + "own_tokenizer": false }, { "name": "Kakanda", "iso_1_code": null, "iso_3_code": "kka", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7001", + "scripts": [], + "own_tokenizer": false }, { "name": "Kami", "iso_1_code": null, "iso_3_code": "kmi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7002", + "scripts": [], + "own_tokenizer": false }, { "name": "Kupa", "iso_1_code": null, "iso_3_code": "kug", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7003", + "scripts": [], + "own_tokenizer": false }, { "name": "Nupe-Nupe-Tako", "iso_1_code": null, "iso_3_code": "nup", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7004", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6998", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6993", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "6989", + "scripts": [], + "own_tokenizer": false }, { "name": "Oko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oko-Eni-Osayen", "iso_1_code": null, "iso_3_code": "oks", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7006", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7005", + "scripts": [], + "own_tokenizer": false }, { "name": "Plateau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Atoro", "iso_1_code": null, "iso_3_code": "tdv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7008", + "scripts": [], + "own_tokenizer": false }, { "name": "Alumic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Arum", "iso_1_code": null, "iso_3_code": "aab", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7010", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7009", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ayu", "iso_1_code": null, "iso_3_code": "ayu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7012", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7011", + "scripts": [], + "own_tokenizer": false }, { "name": "Beromic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Berom", "iso_1_code": null, "iso_3_code": "bom", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7014", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Iten", "iso_1_code": null, "iso_3_code": "etx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7015", + "scripts": [], + "own_tokenizer": false }, { "name": "Shall-Zwall", "iso_1_code": null, "iso_3_code": "sha", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7016", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7013", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "North-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cara", "iso_1_code": null, "iso_3_code": "cfd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7019", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7018", + "scripts": [], + "own_tokenizer": false }, { "name": "South-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ibaas", "iso_1_code": null, "iso_3_code": "cen", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7021", + "scripts": [], + "own_tokenizer": false }, { "name": "Firan", "iso_1_code": null, "iso_3_code": "fir", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7022", + "scripts": [], + "own_tokenizer": false }, { "name": "Ganang", "iso_1_code": null, "iso_3_code": "gne", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7023", + "scripts": [], + "own_tokenizer": false }, { "name": "Rigwe", "iso_1_code": null, "iso_3_code": "iri", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7024", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Izere", "iso_1_code": null, "iso_3_code": "izr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7025", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jju", "iso_1_code": null, "iso_3_code": "kaj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7026", + "scripts": [], + "own_tokenizer": false }, { "name": "Tyap", "iso_1_code": null, "iso_3_code": "kcg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7027", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7020", + "scripts": [], + "own_tokenizer": false }, { "name": "West-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ahwai", "iso_1_code": null, "iso_3_code": "nfd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7029", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7028", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7017", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ejuele", "iso_1_code": null, "iso_3_code": "dbi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7031", + "scripts": [], + "own_tokenizer": false }, { "name": "Ajiya", "iso_1_code": null, "iso_3_code": "idc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7032", + "scripts": [], + "own_tokenizer": false }, { "name": "Ikulu", "iso_1_code": null, "iso_3_code": "ikl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7033", + "scripts": [], + "own_tokenizer": false }, { "name": "Iku-Gora-Ankwa", "iso_1_code": null, "iso_3_code": "ikv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7034", + "scripts": [], + "own_tokenizer": false }, { "name": "Adara", "iso_1_code": null, "iso_3_code": "kad", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7035", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuturmi", "iso_1_code": null, "iso_3_code": "khj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7036", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7030", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Horom", "iso_1_code": null, "iso_3_code": "hoe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7038", + "scripts": [], + "own_tokenizer": false }, { "name": "Bo-Rukul", "iso_1_code": null, "iso_3_code": "mae", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7039", + "scripts": [], + "own_tokenizer": false }, { "name": "Pyam", "iso_1_code": null, "iso_3_code": "pym", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7040", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7037", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Migili", "iso_1_code": null, "iso_3_code": "mgi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7042", + "scripts": [], + "own_tokenizer": false }, { "name": "Rjili", "iso_1_code": null, "iso_3_code": "uji", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7043", + "scripts": [], + "own_tokenizer": false }, { "name": "Koro Nulu", "iso_1_code": null, "iso_3_code": "vkn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7044", + "scripts": [], + "own_tokenizer": false }, { "name": "Koro Zuba", "iso_1_code": null, "iso_3_code": "vkz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7045", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7041", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarokoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Yangkam", "iso_1_code": null, "iso_3_code": "bsx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7047", + "scripts": [], + "own_tokenizer": false }, { "name": "Pye", "iso_1_code": null, "iso_3_code": "pai", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7048", + "scripts": [], + "own_tokenizer": false }, { "name": "Kusur-Myet", "iso_1_code": null, "iso_3_code": "tdl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7049", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarok", "iso_1_code": null, "iso_3_code": "yer", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7050", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7046", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Hyamic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kyoli", "iso_1_code": null, "iso_3_code": "cry", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7054", + "scripts": [], + "own_tokenizer": false }, { "name": "Hyam", "iso_1_code": null, "iso_3_code": "jab", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7055", + "scripts": [], + "own_tokenizer": false }, { "name": "Gyong", "iso_1_code": null, "iso_3_code": "kdm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7056", + "scripts": [], + "own_tokenizer": false }, { "name": "Shamang", "iso_1_code": null, "iso_3_code": "xsh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7057", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhire", "iso_1_code": null, "iso_3_code": "zhi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7058", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7053", + "scripts": [], + "own_tokenizer": false }, { "name": "Koro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ashe", "iso_1_code": null, "iso_3_code": "ahs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7060", + "scripts": [], + "own_tokenizer": false }, { "name": "Koro Wachi", "iso_1_code": null, "iso_3_code": "bqv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7061", + "scripts": [], + "own_tokenizer": false }, { "name": "Duya", "iso_1_code": null, "iso_3_code": "ldb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7062", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyankpa", "iso_1_code": null, "iso_3_code": "yes", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7063", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7059", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7052", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aninka", "iso_1_code": null, "iso_3_code": "aqk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7066", + "scripts": [], + "own_tokenizer": false }, { "name": "Kadung", "iso_1_code": null, "iso_3_code": "dkg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7067", + "scripts": [], + "own_tokenizer": false }, { "name": "Bijim", "iso_1_code": null, "iso_3_code": "jbm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7068", + "scripts": [], + "own_tokenizer": false }, { "name": "Bu", "iso_1_code": null, "iso_3_code": "jid", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7069", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamantan", "iso_1_code": null, "iso_3_code": "kci", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7070", + "scripts": [], + "own_tokenizer": false }, { "name": "Nikyob-Nindem", "iso_1_code": null, "iso_3_code": "kdp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7071", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kanufi", "iso_1_code": null, "iso_3_code": "kni", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7072", + "scripts": [], + "own_tokenizer": false }, { "name": "Mada", "iso_1_code": null, "iso_3_code": "mda", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7073", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Numana", "iso_1_code": null, "iso_3_code": "nbr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7074", + "scripts": [], + "own_tokenizer": false }, { "name": "Ninzo", "iso_1_code": null, "iso_3_code": "nin", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7075", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nungu", "iso_1_code": null, "iso_3_code": "rin", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7076", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuce", "iso_1_code": null, "iso_3_code": "ruk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7077", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiyaa", "iso_1_code": null, "iso_3_code": "tyy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7078", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7065", + "scripts": [], + "own_tokenizer": false }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akye", "iso_1_code": null, "iso_3_code": "aik", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7080", + "scripts": [], + "own_tokenizer": false }, { "name": "Eggon", "iso_1_code": null, "iso_3_code": "ego", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7081", + "scripts": [], + "own_tokenizer": false }, { "name": "Sambe", "iso_1_code": null, "iso_3_code": "xab", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7082", + "scripts": [], + "own_tokenizer": false }, { "name": "Hasha", "iso_1_code": null, "iso_3_code": "ybj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7083", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7079", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7064", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7051", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7007", + "scripts": [], + "own_tokenizer": false }, { "name": "Ukaan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ukaan", "iso_1_code": null, "iso_3_code": "kcf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7085", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7084", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fali of Baissa", "iso_1_code": null, "iso_3_code": "fah", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7087", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7086", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5449", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dogon, Ampari", "iso_1_code": null, "iso_3_code": "aqd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7089", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Dogul Dom", "iso_1_code": null, "iso_3_code": "dbg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7090", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Ben Tey", "iso_1_code": null, "iso_3_code": "dbt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7091", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Bondum Dom", "iso_1_code": null, "iso_3_code": "dbu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7092", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Bankan Tey", "iso_1_code": null, "iso_3_code": "dbw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7093", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Donno So", "iso_1_code": null, "iso_3_code": "dds", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7094", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Bunoge", "iso_1_code": null, "iso_3_code": "dgb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7095", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Jamsay", "iso_1_code": null, "iso_3_code": "djm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7096", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Mombo", "iso_1_code": null, "iso_3_code": "dmb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7097", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Ana Tinga", "iso_1_code": null, "iso_3_code": "dti", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7098", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Tene Kan", "iso_1_code": null, "iso_3_code": "dtk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7099", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Tomo Kan", "iso_1_code": null, "iso_3_code": "dtm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7100", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Tommo So", "iso_1_code": null, "iso_3_code": "dto", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7101", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Toro So", "iso_1_code": null, "iso_3_code": "dts", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7102", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dogon, Toro Tegu", "iso_1_code": null, "iso_3_code": "dtt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7103", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Tebul Ure", "iso_1_code": null, "iso_3_code": "dtu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7104", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Yanda Dom", "iso_1_code": null, "iso_3_code": "dym", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7105", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Nanga Dama", "iso_1_code": null, "iso_3_code": "nzz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7106", + "scripts": [], + "own_tokenizer": false }, { "name": "Dogon, Tiranige Diga", "iso_1_code": null, "iso_3_code": "tde", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7107", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7088", + "scripts": [], + "own_tokenizer": false }, { "name": "Kru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aizi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aizi, Tiagbamrin", "iso_1_code": null, "iso_3_code": "ahi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7110", + "scripts": [], + "own_tokenizer": false }, { "name": "Aizi, Mobumrin", "iso_1_code": null, "iso_3_code": "ahm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7111", + "scripts": [], + "own_tokenizer": false }, { "name": "Aizi, Aproumu", "iso_1_code": null, "iso_3_code": "ahp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7112", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7109", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bakwe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bakw\u00e9", "iso_1_code": null, "iso_3_code": "bjw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7115", + "scripts": [], + "own_tokenizer": false }, { "name": "Wan\u00e9", "iso_1_code": null, "iso_3_code": "hwa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7116", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7114", + "scripts": [], + "own_tokenizer": false }, { "name": "Bete", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "B\u00e9t\u00e9, Gagnoa", "iso_1_code": null, "iso_3_code": "btg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7119", + "scripts": [], + "own_tokenizer": false }, { "name": "Kouya", "iso_1_code": null, "iso_3_code": "kyf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7120", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7118", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "B\u00e9t\u00e9, Guiberoua", "iso_1_code": null, "iso_3_code": "bet", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7122", + "scripts": [], + "own_tokenizer": false }, { "name": "B\u00e9t\u00e9, Daloa", "iso_1_code": null, "iso_3_code": "bev", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7123", + "scripts": [], + "own_tokenizer": false }, { "name": "Godi\u00e9", "iso_1_code": null, "iso_3_code": "god", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7124", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7121", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7117", + "scripts": [], + "own_tokenizer": false }, { "name": "Dida", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dida, Lakota", "iso_1_code": null, "iso_3_code": "dic", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7126", + "scripts": [], + "own_tokenizer": false }, { "name": "Gu\u00e9bie", "iso_1_code": null, "iso_3_code": "gie", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7127", + "scripts": [], + "own_tokenizer": false }, { "name": "Dida, Yocobou\u00e9", "iso_1_code": null, "iso_3_code": "gud", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7128", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Neyo", "iso_1_code": null, "iso_3_code": "ney", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7129", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7125", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwadia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kodia", "iso_1_code": null, "iso_3_code": "kwp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7131", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7130", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7113", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuwaa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kuwaa", "iso_1_code": null, "iso_3_code": "blh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7133", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7132", + "scripts": [], + "own_tokenizer": false }, { "name": "Seme", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Siamou", "iso_1_code": null, "iso_3_code": "sif", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7135", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7134", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bassa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bassa", "iso_1_code": null, "iso_3_code": "bsq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7138", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dewoin", "iso_1_code": null, "iso_3_code": "dee", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7139", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbii", "iso_1_code": null, "iso_3_code": "ggb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7140", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7137", + "scripts": [], + "own_tokenizer": false }, { "name": "Grebo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Glio-Oubi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Glio-Oubi", "iso_1_code": null, "iso_3_code": "oub", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7143", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7142", + "scripts": [], + "own_tokenizer": false }, { "name": "Ivorian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Krumen, Plapo", "iso_1_code": null, "iso_3_code": "ktj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7145", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Krumen, Pye", "iso_1_code": null, "iso_3_code": "pye", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7146", + "scripts": [], + "own_tokenizer": false }, { "name": "Krumen, Tepo", "iso_1_code": null, "iso_3_code": "ted", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7147", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7144", + "scripts": [], + "own_tokenizer": false }, { "name": "Liberian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Grebo, Northern", "iso_1_code": null, "iso_3_code": "gbo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7149", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Grebo, Gboloo", "iso_1_code": null, "iso_3_code": "gec", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7150", + "scripts": [], + "own_tokenizer": false }, { "name": "Grebo, Southern", "iso_1_code": null, "iso_3_code": "grj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7151", + "scripts": [], + "own_tokenizer": false }, { "name": "Grebo, Central", "iso_1_code": null, "iso_3_code": "grv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7152", + "scripts": [], + "own_tokenizer": false }, { "name": "Grebo, Barclayville", "iso_1_code": null, "iso_3_code": "gry", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7153", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7148", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7141", + "scripts": [], + "own_tokenizer": false }, { "name": "Klao", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Klao", "iso_1_code": null, "iso_3_code": "klu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7155", + "scripts": [], + "own_tokenizer": false }, { "name": "Tajuasohn", "iso_1_code": null, "iso_3_code": "tja", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7156", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7154", + "scripts": [], + "own_tokenizer": false }, { "name": "Wee", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Guere-Krahn", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Daho-Doo", "iso_1_code": null, "iso_3_code": "das", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7159", + "scripts": [], + "own_tokenizer": false }, { "name": "Glaro-Twabo", "iso_1_code": null, "iso_3_code": "glr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7160", + "scripts": [], + "own_tokenizer": false }, { "name": "W\u00e8 Southern", "iso_1_code": null, "iso_3_code": "gxx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7161", + "scripts": [], + "own_tokenizer": false }, { "name": "Sapo", "iso_1_code": null, "iso_3_code": "krn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7162", + "scripts": [], + "own_tokenizer": false }, { "name": "Krahn, Western", "iso_1_code": null, "iso_3_code": "krw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7163", + "scripts": [], + "own_tokenizer": false }, { "name": "W\u00e8 Western", "iso_1_code": null, "iso_3_code": "wec", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7164", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7158", + "scripts": [], + "own_tokenizer": false }, { "name": "Konobo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Krahn, Eastern", "iso_1_code": null, "iso_3_code": "kqo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7166", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7165", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyabwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nyabwa", "iso_1_code": null, "iso_3_code": "nwb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7168", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7167", + "scripts": [], + "own_tokenizer": false }, { "name": "Wobe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "W\u00e8 Northern", "iso_1_code": null, "iso_3_code": "wob", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7170", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7169", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7157", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7136", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7108", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Esuma", "iso_1_code": null, "iso_3_code": "esm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7172", + "scripts": [], + "own_tokenizer": false }, { "name": "Boro", "iso_1_code": null, "iso_3_code": "xxb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7173", + "scripts": [], + "own_tokenizer": false }, { "name": "Left Bank", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Avatime-Nyangbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Avatime", "iso_1_code": null, "iso_3_code": "avn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7176", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nyagbo", "iso_1_code": null, "iso_3_code": "nyb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7177", + "scripts": [], + "own_tokenizer": false }, { "name": "Tafi", "iso_1_code": null, "iso_3_code": "tcd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7178", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7175", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aguna", "iso_1_code": null, "iso_3_code": "aug", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7180", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Ci", "iso_1_code": null, "iso_3_code": "cib", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7181", + "scripts": [], + "own_tokenizer": false }, { "name": "\u00c9w\u00e9", "iso_1_code": "ee", "iso_3_code": "ewe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7182", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gbe, Gbesi", "iso_1_code": null, "iso_3_code": "gbs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7183", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Eastern Xwla", "iso_1_code": null, "iso_3_code": "gbx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7184", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpessi", "iso_1_code": null, "iso_3_code": "kef", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7185", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Kotafon", "iso_1_code": null, "iso_3_code": "kqk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7186", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Saxwe", "iso_1_code": null, "iso_3_code": "sxw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7187", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Waci", "iso_1_code": null, "iso_3_code": "wci", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7188", + "scripts": [], + "own_tokenizer": false }, { "name": "Wudu", "iso_1_code": null, "iso_3_code": "wud", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7189", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Xwela", "iso_1_code": null, "iso_3_code": "xwe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7190", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Western Xwla", "iso_1_code": null, "iso_3_code": "xwl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7191", + "scripts": [], + "own_tokenizer": false }, { "name": "Aja", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Aja", "iso_1_code": null, "iso_3_code": "ajg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7193", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gbe, Ayizo", "iso_1_code": null, "iso_3_code": "ayb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7194", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Defi", "iso_1_code": null, "iso_3_code": "gbh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7195", + "scripts": [], + "own_tokenizer": false }, { "name": "Gun", "iso_1_code": null, "iso_3_code": "guw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7196", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gbe, Tofin", "iso_1_code": null, "iso_3_code": "tfi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7197", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbe, Weme", "iso_1_code": null, "iso_3_code": "wem", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7198", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7192", + "scripts": [], + "own_tokenizer": false }, { "name": "Fon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fon", "iso_1_code": null, "iso_3_code": "fon", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7200", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gbe, Maxi", "iso_1_code": null, "iso_3_code": "mxl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7199", + "scripts": [], + "own_tokenizer": false }, { "name": "Mina", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gen", "iso_1_code": null, "iso_3_code": "gej", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7203", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7202", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7179", + "scripts": [], + "own_tokenizer": false }, { "name": "Kebu-Animere", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Animere", "iso_1_code": null, "iso_3_code": "anf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7205", + "scripts": [], + "own_tokenizer": false }, { "name": "Akebu", "iso_1_code": null, "iso_3_code": "keu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7206", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7204", + "scripts": [], + "own_tokenizer": false }, { "name": "Kposo-Ahlo-Bowili", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Adangbe", "iso_1_code": null, "iso_3_code": "adq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7208", + "scripts": [], + "own_tokenizer": false }, { "name": "Igo", "iso_1_code": null, "iso_3_code": "ahl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7209", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuwuli", "iso_1_code": null, "iso_3_code": "bov", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7210", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ikposo", "iso_1_code": null, "iso_3_code": "kpo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7211", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7207", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7174", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Agneby", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ab\u00e9", "iso_1_code": null, "iso_3_code": "aba", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7214", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Abidji", "iso_1_code": null, "iso_3_code": "abi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7215", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Adioukrou", "iso_1_code": null, "iso_3_code": "adj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7216", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7213", + "scripts": [], + "own_tokenizer": false }, { "name": "Attie", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Atti\u00e9", "iso_1_code": null, "iso_3_code": "ati", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7218", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7217", + "scripts": [], + "own_tokenizer": false }, { "name": "Avikam-Alladian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Alladian", "iso_1_code": null, "iso_3_code": "ald", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7220", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Avikam", "iso_1_code": null, "iso_3_code": "avi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7221", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7219", + "scripts": [], + "own_tokenizer": false }, { "name": "Ga-Dangme", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dangme", "iso_1_code": null, "iso_3_code": "ada", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7223", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ga", "iso_1_code": null, "iso_3_code": "gaa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7224", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7222", + "scripts": [], + "own_tokenizer": false }, { "name": "Potou-Tano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Basila-Adele", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Adele", "iso_1_code": null, "iso_3_code": "ade", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7227", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Anii", "iso_1_code": null, "iso_3_code": "blo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7228", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7226", + "scripts": [], + "own_tokenizer": false }, { "name": "Ega", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ega", "iso_1_code": null, "iso_3_code": "ega", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7230", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7229", + "scripts": [], + "own_tokenizer": false }, { "name": "Lelemi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lelemi-Akpafu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Siwu", "iso_1_code": null, "iso_3_code": "akp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7233", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lelemi", "iso_1_code": null, "iso_3_code": "lef", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7234", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7232", + "scripts": [], + "own_tokenizer": false }, { "name": "Likpe-Santrokofi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Sekpele", "iso_1_code": null, "iso_3_code": "lip", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7236", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Selee", "iso_1_code": null, "iso_3_code": "snw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7237", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7235", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7231", + "scripts": [], + "own_tokenizer": false }, { "name": "Logba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Logba", "iso_1_code": null, "iso_3_code": "lgq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7239", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7238", + "scripts": [], + "own_tokenizer": false }, { "name": "Potou", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tchaman", "iso_1_code": null, "iso_3_code": "ebr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7241", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbato", "iso_1_code": null, "iso_3_code": "gwa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7242", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7240", + "scripts": [], + "own_tokenizer": false }, { "name": "Tano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Abron", "iso_1_code": null, "iso_3_code": "abr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7246", + "scripts": [], + "own_tokenizer": false }, { "name": "Akan", "iso_1_code": "ak", "iso_3_code": "aka", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7247", + "scripts": [], + "own_tokenizer": false }, { "name": "Wasa", "iso_1_code": null, "iso_3_code": "wss", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7248", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7245", + "scripts": [], + "own_tokenizer": false }, { "name": "Bia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Anyin", "iso_1_code": null, "iso_3_code": "any", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7251", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Baoul\u00e9", "iso_1_code": null, "iso_3_code": "bci", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7252", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Anufo", "iso_1_code": null, "iso_3_code": "cko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7253", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Anyin Morofo", "iso_1_code": null, "iso_3_code": "mtb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7254", + "scripts": [], + "own_tokenizer": false }, { "name": "Esahie", "iso_1_code": null, "iso_3_code": "sfw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7255", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7250", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ahanta", "iso_1_code": null, "iso_3_code": "aha", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7257", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jwira-Pepesa", "iso_1_code": null, "iso_3_code": "jwi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7258", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzema", "iso_1_code": null, "iso_3_code": "nzi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] - } - ] + "children": [], + "node_i": "7259", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "7256", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7249", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7244", + "scripts": [], + "own_tokenizer": false }, { "name": "Guang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "North Guang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gikyode", "iso_1_code": null, "iso_3_code": "acd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7262", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ginyanga", "iso_1_code": null, "iso_3_code": "ayg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7263", + "scripts": [], + "own_tokenizer": false }, { "name": "Tchumbuli", "iso_1_code": null, "iso_3_code": "bqa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7264", + "scripts": [], + "own_tokenizer": false }, { "name": "Dompo", "iso_1_code": null, "iso_3_code": "doy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7265", + "scripts": [], + "own_tokenizer": false }, { "name": "Foodo", "iso_1_code": null, "iso_3_code": "fod", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7266", + "scripts": [], + "own_tokenizer": false }, { "name": "Gonja", "iso_1_code": null, "iso_3_code": "gjn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7267", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kplang", "iso_1_code": null, "iso_3_code": "kph", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7268", + "scripts": [], + "own_tokenizer": false }, { "name": "Krache", "iso_1_code": null, "iso_3_code": "kye", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7269", + "scripts": [], + "own_tokenizer": false }, { "name": "Nawuri", "iso_1_code": null, "iso_3_code": "naw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7270", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chumburung", "iso_1_code": null, "iso_3_code": "ncu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7271", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nkonya", "iso_1_code": null, "iso_3_code": "nko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7272", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nkami", "iso_1_code": null, "iso_3_code": "nkq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7273", + "scripts": [], + "own_tokenizer": false }, { "name": "Nchumbulu", "iso_1_code": null, "iso_3_code": "nlu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7274", + "scripts": [], + "own_tokenizer": false }, { "name": "Dwang", "iso_1_code": null, "iso_3_code": "nnu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7275", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7261", + "scripts": [], + "own_tokenizer": false }, { "name": "South Guang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awutu", "iso_1_code": null, "iso_3_code": "afu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7277", + "scripts": [], + "own_tokenizer": false }, { "name": "Cherepon", "iso_1_code": null, "iso_3_code": "cpn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7278", + "scripts": [], + "own_tokenizer": false }, { "name": "Gua", "iso_1_code": null, "iso_3_code": "gwx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7279", + "scripts": [], + "own_tokenizer": false }, { "name": "Larteh", "iso_1_code": null, "iso_3_code": "lar", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7280", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7276", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7260", + "scripts": [], + "own_tokenizer": false }, { "name": "Krobu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Krobu", "iso_1_code": null, "iso_3_code": "kxb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7282", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7281", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Abure", "iso_1_code": null, "iso_3_code": "abu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7284", + "scripts": [], + "own_tokenizer": false }, { "name": "Beti", "iso_1_code": null, "iso_3_code": "eot", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7285", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7283", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7243", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7225", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7212", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7171", + "scripts": [], + "own_tokenizer": false }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Adamawa-Ubangi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Adamawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fali", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Fali, South", "iso_1_code": null, "iso_3_code": "fal", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7290", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fali, North", "iso_1_code": null, "iso_3_code": "fll", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7291", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7289", + "scripts": [], + "own_tokenizer": false }, { "name": "Kam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kam", "iso_1_code": null, "iso_3_code": "kdx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7293", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7292", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kwa", "iso_1_code": null, "iso_3_code": "kwb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7294", + "scripts": [], + "own_tokenizer": false }, { "name": "La\u2019bi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "La\u2019bi", "iso_1_code": null, "iso_3_code": "lbi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7297", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7296", + "scripts": [], + "own_tokenizer": false }, { "name": "Leko-Nimbari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Duru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dii", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Duupa", "iso_1_code": null, "iso_3_code": "dae", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7301", + "scripts": [], + "own_tokenizer": false }, { "name": "Dii", "iso_1_code": null, "iso_3_code": "dur", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7302", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dugun", "iso_1_code": null, "iso_3_code": "ndu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7303", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7300", + "scripts": [], + "own_tokenizer": false }, { "name": "Duli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Duli-Gey", "iso_1_code": null, "iso_3_code": "duz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7305", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7304", + "scripts": [], + "own_tokenizer": false }, { "name": "Voko-Dowayo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kutin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Pere", "iso_1_code": null, "iso_3_code": "pfe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7308", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7307", + "scripts": [], + "own_tokenizer": false }, { "name": "Vere-Dowayo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dowayo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Doyayo", "iso_1_code": null, "iso_3_code": "dow", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7311", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7310", + "scripts": [], + "own_tokenizer": false }, { "name": "Vere-Gimme", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gimme", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gimnime", "iso_1_code": null, "iso_3_code": "gmn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7314", + "scripts": [], + "own_tokenizer": false }, { "name": "Gimme", "iso_1_code": null, "iso_3_code": "kmp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7315", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7313", + "scripts": [], + "own_tokenizer": false }, { "name": "Vere", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koma", "iso_1_code": null, "iso_3_code": "kmy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7317", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Verre", "iso_1_code": null, "iso_3_code": "ver", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7318", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7316", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7312", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7309", + "scripts": [], + "own_tokenizer": false }, { "name": "Voko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Longto", "iso_1_code": null, "iso_3_code": "wok", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "7320", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "7319", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7306", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7299", + "scripts": [], + "own_tokenizer": false }, { "name": "Leko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kolbila", "iso_1_code": null, "iso_3_code": "klc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7322", + "scripts": [], + "own_tokenizer": false }, { "name": "Mubako", "iso_1_code": null, "iso_3_code": "muo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7323", + "scripts": [], + "own_tokenizer": false }, { "name": "Samba Leko", "iso_1_code": null, "iso_3_code": "ndi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7324", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wom", "iso_1_code": null, "iso_3_code": "wom", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7325", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7321", + "scripts": [], + "own_tokenizer": false }, { "name": "Mumuye-Yandang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mumuye", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gengle", "iso_1_code": null, "iso_3_code": "geg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7328", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumba", "iso_1_code": null, "iso_3_code": "ksm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7329", + "scripts": [], + "own_tokenizer": false }, { "name": "Mumuye", "iso_1_code": null, "iso_3_code": "mzm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7330", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pangseng", "iso_1_code": null, "iso_3_code": "pgs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7331", + "scripts": [], + "own_tokenizer": false }, { "name": "Rang", "iso_1_code": null, "iso_3_code": "rax", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7332", + "scripts": [], + "own_tokenizer": false }, { "name": "Teme", "iso_1_code": null, "iso_3_code": "tdo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7333", + "scripts": [], + "own_tokenizer": false }, { "name": "Waka", "iso_1_code": null, "iso_3_code": "wav", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7334", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7327", + "scripts": [], + "own_tokenizer": false }, { "name": "Yandang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bali", "iso_1_code": null, "iso_3_code": "bcn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7336", + "scripts": [], + "own_tokenizer": false }, { "name": "Kugama", "iso_1_code": null, "iso_3_code": "kow", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7337", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpasham", "iso_1_code": null, "iso_3_code": "pbn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7338", + "scripts": [], + "own_tokenizer": false }, { "name": "Yendang", "iso_1_code": null, "iso_3_code": "ynq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7339", + "scripts": [], + "own_tokenizer": false }, { "name": "Yotti", "iso_1_code": null, "iso_3_code": "yot", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7340", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7335", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7326", + "scripts": [], + "own_tokenizer": false }, { "name": "Nimbari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nimbari", "iso_1_code": null, "iso_3_code": "nmr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7342", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7341", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7298", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbum-Day", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bua", "iso_1_code": null, "iso_3_code": "bub", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7345", + "scripts": [], + "own_tokenizer": false }, { "name": "Bolgo", "iso_1_code": null, "iso_3_code": "bvo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7346", + "scripts": [], + "own_tokenizer": false }, { "name": "Fania", "iso_1_code": null, "iso_3_code": "fni", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7347", + "scripts": [], + "own_tokenizer": false }, { "name": "Bon Gula", "iso_1_code": null, "iso_3_code": "glc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7348", + "scripts": [], + "own_tokenizer": false }, { "name": "Gula Iro", "iso_1_code": null, "iso_3_code": "glj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7349", + "scripts": [], + "own_tokenizer": false }, { "name": "Koke", "iso_1_code": null, "iso_3_code": "kou", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7350", + "scripts": [], + "own_tokenizer": false }, { "name": "Niellim", "iso_1_code": null, "iso_3_code": "nie", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7351", + "scripts": [], + "own_tokenizer": false }, { "name": "Noy", "iso_1_code": null, "iso_3_code": "noy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7352", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunia", "iso_1_code": null, "iso_3_code": "tug", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7353", + "scripts": [], + "own_tokenizer": false }, { "name": "Zan Gula", "iso_1_code": null, "iso_3_code": "zna", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7354", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7344", + "scripts": [], + "own_tokenizer": false }, { "name": "Day", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Day", "iso_1_code": null, "iso_3_code": "dai", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7356", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7355", + "scripts": [], + "own_tokenizer": false }, { "name": "Kim", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Besme", "iso_1_code": null, "iso_3_code": "bes", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7358", + "scripts": [], + "own_tokenizer": false }, { "name": "Goundo", "iso_1_code": null, "iso_3_code": "goy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7359", + "scripts": [], + "own_tokenizer": false }, { "name": "Kim", "iso_1_code": null, "iso_3_code": "kia", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7360", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7357", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern Mbum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Karang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kare", "iso_1_code": null, "iso_3_code": "kbn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7364", + "scripts": [], + "own_tokenizer": false }, { "name": "Karang", "iso_1_code": null, "iso_3_code": "kzr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7365", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzakambay", "iso_1_code": null, "iso_3_code": "nzy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7366", + "scripts": [], + "own_tokenizer": false }, { "name": "Pana", "iso_1_code": null, "iso_3_code": "pnz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7367", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7363", + "scripts": [], + "own_tokenizer": false }, { "name": "Koh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kuo", "iso_1_code": null, "iso_3_code": "xuo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] - } - ] + "children": [], + "node_i": "7369", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "7368", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7362", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dama-Galke", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dama", "iso_1_code": null, "iso_3_code": "dmm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7372", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndai", "iso_1_code": null, "iso_3_code": "gke", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7373", + "scripts": [], + "own_tokenizer": false }, { "name": "Mono", "iso_1_code": null, "iso_3_code": "mru", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7374", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7371", + "scripts": [], + "own_tokenizer": false }, { "name": "Tupuri-Mambai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mambai", "iso_1_code": null, "iso_3_code": "mcs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7376", + "scripts": [], + "own_tokenizer": false }, { "name": "Mundang", "iso_1_code": null, "iso_3_code": "mua", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7377", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tupuri", "iso_1_code": null, "iso_3_code": "tui", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] - } - ] + "children": [], + "node_i": "7378", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "7375", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7370", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbum", "iso_1_code": null, "iso_3_code": "mdd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7380", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7379", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dek", "iso_1_code": null, "iso_3_code": "dek", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7382", + "scripts": [], + "own_tokenizer": false }, { "name": "Pam", "iso_1_code": null, "iso_3_code": "pmn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7383", + "scripts": [], + "own_tokenizer": false }, { "name": "To", "iso_1_code": null, "iso_3_code": "toz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7384", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7381", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7361", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7343", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Oblo", "iso_1_code": null, "iso_3_code": "obl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7386", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7385", + "scripts": [], + "own_tokenizer": false }, { "name": "Waja-Jen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kyak", "iso_1_code": null, "iso_3_code": "bka", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7389", + "scripts": [], + "own_tokenizer": false }, { "name": "Burak", "iso_1_code": null, "iso_3_code": "bys", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7390", + "scripts": [], + "own_tokenizer": false }, { "name": "M\u00e1ghd\u00ec", "iso_1_code": null, "iso_3_code": "gmd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7391", + "scripts": [], + "own_tokenizer": false }, { "name": "Moo", "iso_1_code": null, "iso_3_code": "gwg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7392", + "scripts": [], + "own_tokenizer": false }, { "name": "Dza", "iso_1_code": null, "iso_3_code": "jen", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7393", + "scripts": [], + "own_tokenizer": false }, { "name": "Leelau", "iso_1_code": null, "iso_3_code": "ldk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7394", + "scripts": [], + "own_tokenizer": false }, { "name": "Loo", "iso_1_code": null, "iso_3_code": "ldo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7395", + "scripts": [], + "own_tokenizer": false }, { "name": "Mingang Doso", "iso_1_code": null, "iso_3_code": "mko", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7396", + "scripts": [], + "own_tokenizer": false }, { "name": "Mak", "iso_1_code": null, "iso_3_code": "pbl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7397", + "scripts": [], + "own_tokenizer": false }, { "name": "Tha", "iso_1_code": null, "iso_3_code": "thy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7388", + "scripts": [], + "own_tokenizer": false }, { "name": "Longuda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Longuda", "iso_1_code": null, "iso_3_code": "lnu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7400", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7399", + "scripts": [], + "own_tokenizer": false }, { "name": "Waja", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Awak", "iso_1_code": null, "iso_3_code": "awo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7403", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamo", "iso_1_code": null, "iso_3_code": "kcq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7404", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7402", + "scripts": [], + "own_tokenizer": false }, { "name": "Cham-Mona", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cham", "iso_1_code": null, "iso_3_code": "cfa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7406", + "scripts": [], + "own_tokenizer": false }, { "name": "Tso", "iso_1_code": null, "iso_3_code": "ldp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7407", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7405", + "scripts": [], + "own_tokenizer": false }, { "name": "Dadiya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dadiya", "iso_1_code": null, "iso_3_code": "dbd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7409", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7408", + "scripts": [], + "own_tokenizer": false }, { "name": "Tula", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bangwinji", "iso_1_code": null, "iso_3_code": "bsj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7411", + "scripts": [], + "own_tokenizer": false }, { "name": "Tula", "iso_1_code": null, "iso_3_code": "tul", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7412", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Waja", "iso_1_code": null, "iso_3_code": "wja", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7413", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7410", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7401", + "scripts": [], + "own_tokenizer": false }, { "name": "Yungur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Libo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kaan", "iso_1_code": null, "iso_3_code": "ldl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7416", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7415", + "scripts": [], + "own_tokenizer": false }, { "name": "Mboi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mboi", "iso_1_code": null, "iso_3_code": "moi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7418", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7417", + "scripts": [], + "own_tokenizer": false }, { "name": "Yungur-Roba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lala-Roba", "iso_1_code": null, "iso_3_code": "lla", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7420", + "scripts": [], + "own_tokenizer": false }, { "name": "Voro", "iso_1_code": null, "iso_3_code": "vor", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7421", + "scripts": [], + "own_tokenizer": false }, { "name": "Bena", "iso_1_code": null, "iso_3_code": "yun", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7422", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7419", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7414", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7387", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7288", + "scripts": [], + "own_tokenizer": false }, { "name": "Ubangi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central Core", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda-Bambari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda-Bambari", "iso_1_code": null, "iso_3_code": "liy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7428", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7427", + "scripts": [], + "own_tokenizer": false }, { "name": "Banda-Banda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda-Banda", "iso_1_code": null, "iso_3_code": "bpd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7430", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7429", + "scripts": [], + "own_tokenizer": false }, { "name": "Banda-Mbres", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda-Mbr\u00e8s", "iso_1_code": null, "iso_3_code": "bqk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7432", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7431", + "scripts": [], + "own_tokenizer": false }, { "name": "Banda-Ndele", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda-Nd\u00e9l\u00e9", "iso_1_code": null, "iso_3_code": "bfl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7434", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7433", + "scripts": [], + "own_tokenizer": false }, { "name": "Mid-Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda, Mid-Southern", "iso_1_code": null, "iso_3_code": "bjo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7436", + "scripts": [], + "own_tokenizer": false }, { "name": "Gobu", "iso_1_code": null, "iso_3_code": "gox", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7437", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpagua", "iso_1_code": null, "iso_3_code": "kuw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7438", + "scripts": [], + "own_tokenizer": false }, { "name": "Mono", "iso_1_code": null, "iso_3_code": "mnh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7439", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngundu", "iso_1_code": null, "iso_3_code": "nue", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7435", + "scripts": [], + "own_tokenizer": false }, { "name": "Togbo-Vara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda, Togbo-Vara", "iso_1_code": null, "iso_3_code": "tor", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "7442", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "7441", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7426", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda-Yangere", "iso_1_code": null, "iso_3_code": "yaj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7444", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7443", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7425", + "scripts": [], + "own_tokenizer": false }, { "name": "South Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Langbashe", "iso_1_code": null, "iso_3_code": "lna", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7446", + "scripts": [], + "own_tokenizer": false }, { "name": "Banda, South Central", "iso_1_code": null, "iso_3_code": "lnl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7447", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7445", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbandja", "iso_1_code": null, "iso_3_code": "zmz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7449", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7448", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngbundu", "iso_1_code": null, "iso_3_code": "nuu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7451", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7450", + "scripts": [], + "own_tokenizer": false }, { "name": "West Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Banda, West Central", "iso_1_code": null, "iso_3_code": "bbp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7453", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7452", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7424", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbaya-Manza-Ngbaka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Suma", "iso_1_code": null, "iso_3_code": "sqm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7455", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bhogoto", "iso_1_code": null, "iso_3_code": "bdt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7457", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbaya-Bossangoa", "iso_1_code": null, "iso_3_code": "gbp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7458", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbaya-Bozoum", "iso_1_code": null, "iso_3_code": "gbq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7459", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbanu", "iso_1_code": null, "iso_3_code": "gbv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7460", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7456", + "scripts": [], + "own_tokenizer": false }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ali", "iso_1_code": null, "iso_3_code": "aiy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7462", + "scripts": [], + "own_tokenizer": false }, { "name": "Bofi", "iso_1_code": null, "iso_3_code": "bff", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7463", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandja", "iso_1_code": null, "iso_3_code": "mzv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7464", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbaka", "iso_1_code": null, "iso_3_code": "nga", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7465", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbaka Manza", "iso_1_code": null, "iso_3_code": "ngg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7466", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7461", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gbaya, Northwest", "iso_1_code": null, "iso_3_code": "gya", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7468", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7467", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bangandu", "iso_1_code": null, "iso_3_code": "bgf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7470", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbaya-Mbodomo", "iso_1_code": null, "iso_3_code": "gmm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7471", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbaya, Southwest", "iso_1_code": null, "iso_3_code": "gso", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7472", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngombe", "iso_1_code": null, "iso_3_code": "nmj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7473", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7469", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7454", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbandi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dendi", "iso_1_code": null, "iso_3_code": "deq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7475", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbayi", "iso_1_code": null, "iso_3_code": "gyg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7476", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbangi", "iso_1_code": null, "iso_3_code": "mgn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7477", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbandi, Southern", "iso_1_code": null, "iso_3_code": "nbw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7478", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbandi, Northern", "iso_1_code": null, "iso_3_code": "ngb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7479", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yakoma", "iso_1_code": null, "iso_3_code": "yky", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7480", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7474", + "scripts": [], + "own_tokenizer": false }, { "name": "Sere-Ngbaka-Mba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngbaka-Mba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dongo", "iso_1_code": null, "iso_3_code": "doo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7484", + "scripts": [], + "own_tokenizer": false }, { "name": "Mba", "iso_1_code": null, "iso_3_code": "mfc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7485", + "scripts": [], + "own_tokenizer": false }, { "name": "Ma", "iso_1_code": null, "iso_3_code": "msj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7486", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndunga", "iso_1_code": null, "iso_3_code": "ndt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7487", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7483", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbaka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mayogo-Bangba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bangba", "iso_1_code": null, "iso_3_code": "bbe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7491", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayogo", "iso_1_code": null, "iso_3_code": "mdm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7492", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7490", + "scripts": [], + "own_tokenizer": false }, { "name": "Mundu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "M\u00fcnd\u00fc", "iso_1_code": null, "iso_3_code": "muh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7494", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7493", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7489", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baka-Gundi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baka", "iso_1_code": null, "iso_3_code": "bkc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7497", + "scripts": [], + "own_tokenizer": false }, { "name": "Limassa", "iso_1_code": null, "iso_3_code": "bme", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7498", + "scripts": [], + "own_tokenizer": false }, { "name": "Gundi", "iso_1_code": null, "iso_3_code": "gdi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7499", + "scripts": [], + "own_tokenizer": false }, { "name": "Ganzi", "iso_1_code": null, "iso_3_code": "gnz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7500", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7496", + "scripts": [], + "own_tokenizer": false }, { "name": "Bwaka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gilima", "iso_1_code": null, "iso_3_code": "gix", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7502", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngbaka Ma\u2019bo", "iso_1_code": null, "iso_3_code": "nbm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7503", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7501", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbanzili", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buraka", "iso_1_code": null, "iso_3_code": "bkg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7505", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbanziri", "iso_1_code": null, "iso_3_code": "gbg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7506", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7504", + "scripts": [], + "own_tokenizer": false }, { "name": "Monzombo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kpala", "iso_1_code": null, "iso_3_code": "kpl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7508", + "scripts": [], + "own_tokenizer": false }, { "name": "Monzombo", "iso_1_code": null, "iso_3_code": "moj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7509", + "scripts": [], + "own_tokenizer": false }, { "name": "Yango", "iso_1_code": null, "iso_3_code": "yng", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7510", + "scripts": [], + "own_tokenizer": false } - ] - } - ] + ], + "node_i": "7507", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "7495", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7488", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7482", + "scripts": [], + "own_tokenizer": false }, { "name": "Sere", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Feroge-Mangaya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Feroge", "iso_1_code": null, "iso_3_code": "fer", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7513", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangayat", "iso_1_code": null, "iso_3_code": "myj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7514", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7512", + "scripts": [], + "own_tokenizer": false }, { "name": "Indri-Togoyo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Indri", "iso_1_code": null, "iso_3_code": "idr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7516", + "scripts": [], + "own_tokenizer": false }, { "name": "Togoyo", "iso_1_code": null, "iso_3_code": "tgy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7517", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7515", + "scripts": [], + "own_tokenizer": false }, { "name": "Sere-Bviri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bai-Viri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bai", "iso_1_code": null, "iso_3_code": "bdj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7520", + "scripts": [], + "own_tokenizer": false }, { "name": "Belanda Viri", "iso_1_code": null, "iso_3_code": "bvi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7519", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndogo-Sere", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ndogo", "iso_1_code": null, "iso_3_code": "ndz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7523", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sere", "iso_1_code": null, "iso_3_code": "swf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7524", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagbu", "iso_1_code": null, "iso_3_code": "tbm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "7525", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "7522", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7518", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7511", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7481", + "scripts": [], + "own_tokenizer": false }, { "name": "Zande", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Barambo-Pambia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Barambu", "iso_1_code": null, "iso_3_code": "brm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7528", + "scripts": [], + "own_tokenizer": false }, { "name": "Pambia", "iso_1_code": null, "iso_3_code": "pmb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7529", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7527", + "scripts": [], + "own_tokenizer": false }, { "name": "Zande-Nzakara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Geme", "iso_1_code": null, "iso_3_code": "geq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7531", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpatili", "iso_1_code": null, "iso_3_code": "kym", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7532", + "scripts": [], + "own_tokenizer": false }, { "name": "Nzakara", "iso_1_code": null, "iso_3_code": "nzk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7533", + "scripts": [], + "own_tokenizer": false }, { "name": "Zande", "iso_1_code": null, "iso_3_code": "zne", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7534", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7530", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7526", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7423", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7287", + "scripts": [], + "own_tokenizer": false }, { "name": "Gur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bariba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Baatonum", "iso_1_code": null, "iso_3_code": "bba", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7537", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7536", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bwamu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bomu", "iso_1_code": null, "iso_3_code": "bmq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7541", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Buamu", "iso_1_code": null, "iso_3_code": "box", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7542", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bwamu, L\u00e1\u00e1 L\u00e1\u00e1", "iso_1_code": null, "iso_3_code": "bwj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7543", + "scripts": [], + "own_tokenizer": false }, { "name": "Bwamu, Cwi", "iso_1_code": null, "iso_3_code": "bwy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7544", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7540", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurumfe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koromf\u00e9", "iso_1_code": null, "iso_3_code": "kfz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7546", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7545", + "scripts": [], + "own_tokenizer": false }, { "name": "Oti-Volta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buli-Koma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Buli", "iso_1_code": null, "iso_3_code": "bwu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7549", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Konni", "iso_1_code": null, "iso_3_code": "kma", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7550", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7548", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Biali", "iso_1_code": null, "iso_3_code": "beh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7552", + "scripts": [], + "own_tokenizer": false }, { "name": "Mbelime", "iso_1_code": null, "iso_3_code": "mql", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7553", + "scripts": [], + "own_tokenizer": false }, { "name": "Ditammari", "iso_1_code": null, "iso_3_code": "tbz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7554", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Waama", "iso_1_code": null, "iso_3_code": "wwa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7555", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7551", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ngangam", "iso_1_code": null, "iso_3_code": "gng", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7557", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gourmanch\u00e9ma", "iso_1_code": null, "iso_3_code": "gux", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7558", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nateni", "iso_1_code": null, "iso_3_code": "ntm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7559", + "scripts": [], + "own_tokenizer": false }, { "name": "Miyobe", "iso_1_code": null, "iso_3_code": "soy", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7560", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Konkomba", "iso_1_code": null, "iso_3_code": "xon", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7561", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Moba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bimoba", "iso_1_code": null, "iso_3_code": "bim", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7563", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Moba", "iso_1_code": null, "iso_3_code": "mfq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7564", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7562", + "scripts": [], + "own_tokenizer": false }, { "name": "Ntcham", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Akaselem", "iso_1_code": null, "iso_3_code": "aks", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7566", + "scripts": [], + "own_tokenizer": false }, { "name": "Ntcham", "iso_1_code": null, "iso_3_code": "bud", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] - } - ] + "children": [], + "node_i": "7567", + "scripts": [ + "Latn" + ], + "own_tokenizer": false + } + ], + "node_i": "7565", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7556", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nootre", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Notre", "iso_1_code": null, "iso_3_code": "bly", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7570", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7569", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Farefare", "iso_1_code": null, "iso_3_code": "gur", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7572", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Moore", "iso_1_code": null, "iso_3_code": "mos", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7573", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Safaliba", "iso_1_code": null, "iso_3_code": "saf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7574", + "scripts": [], + "own_tokenizer": false }, { "name": "Wali", "iso_1_code": null, "iso_3_code": "wlx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7575", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dagaari-Birifor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Birifor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Birifor, Malba", "iso_1_code": null, "iso_3_code": "bfo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7578", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Birifor, Southern", "iso_1_code": null, "iso_3_code": "biv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7579", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7577", + "scripts": [], + "own_tokenizer": false }, { "name": "Dagaari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dagaare, Southern", "iso_1_code": null, "iso_3_code": "dga", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7581", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dagaari Dioula", "iso_1_code": null, "iso_3_code": "dgd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7582", + "scripts": [], + "own_tokenizer": false }, { "name": "Dagara, Northern", "iso_1_code": null, "iso_3_code": "dgi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7583", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7580", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7576", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7571", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dagbani", "iso_1_code": null, "iso_3_code": "dag", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7585", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hanga", "iso_1_code": null, "iso_3_code": "hag", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7586", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kamara", "iso_1_code": null, "iso_3_code": "jmr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7587", + "scripts": [], + "own_tokenizer": false }, { "name": "Kusaal", "iso_1_code": null, "iso_3_code": "kus", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7588", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mampruli", "iso_1_code": null, "iso_3_code": "maw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7589", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kantosi", "iso_1_code": null, "iso_3_code": "xkt", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "7590", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "7584", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7568", + "scripts": [], + "own_tokenizer": false }, { "name": "Yom-Nawdm", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nawdm", "iso_1_code": null, "iso_3_code": "nmz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7592", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yom", "iso_1_code": null, "iso_3_code": "pil", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7593", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7591", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7547", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7539", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dogoso-Khe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dogoso", "iso_1_code": null, "iso_3_code": "dgs", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7596", + "scripts": [], + "own_tokenizer": false }, { "name": "Khe", "iso_1_code": null, "iso_3_code": "kqg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7597", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7595", + "scripts": [], + "own_tokenizer": false }, { "name": "Dyan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dyan", "iso_1_code": null, "iso_3_code": "dya", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7599", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7598", + "scripts": [], + "own_tokenizer": false }, { "name": "Gan-Dogose", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dogos\u00e9", "iso_1_code": null, "iso_3_code": "dos", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7601", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kaansa", "iso_1_code": null, "iso_3_code": "gna", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7602", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Khisa", "iso_1_code": null, "iso_3_code": "kqm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7603", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7600", + "scripts": [], + "own_tokenizer": false }, { "name": "Grusi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bago-Kusuntu", "iso_1_code": null, "iso_3_code": "bqg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7606", + "scripts": [], + "own_tokenizer": false }, { "name": "Chala", "iso_1_code": null, "iso_3_code": "cll", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7607", + "scripts": [], + "own_tokenizer": false }, { "name": "Lukpa", "iso_1_code": null, "iso_3_code": "dop", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7608", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kabiy\u00e8", "iso_1_code": null, "iso_3_code": "kbp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7609", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tem", "iso_1_code": null, "iso_3_code": "kdh", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7610", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lama", "iso_1_code": null, "iso_3_code": "las", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7611", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Delo", "iso_1_code": null, "iso_3_code": "ntr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7612", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7605", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kalams\u00e9", "iso_1_code": null, "iso_3_code": "knz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7614", + "scripts": [], + "own_tokenizer": false }, { "name": "Ly\u00e9l\u00e9", "iso_1_code": null, "iso_3_code": "lee", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7615", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nuni, Southern", "iso_1_code": null, "iso_3_code": "nnw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7616", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nuni, Northern", "iso_1_code": null, "iso_3_code": "nuv", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7617", + "scripts": [], + "own_tokenizer": false }, { "name": "Pana", "iso_1_code": null, "iso_3_code": "pnq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7618", + "scripts": [], + "own_tokenizer": false }, { "name": "Kasem", "iso_1_code": null, "iso_3_code": "xsm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7619", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7613", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Chakali", "iso_1_code": null, "iso_3_code": "cli", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7621", + "scripts": [], + "own_tokenizer": false }, { "name": "Winy\u00e9", "iso_1_code": null, "iso_3_code": "kst", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7622", + "scripts": [], + "own_tokenizer": false }, { "name": "Deg", "iso_1_code": null, "iso_3_code": "mzw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7623", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Phuie", "iso_1_code": null, "iso_3_code": "pug", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7624", + "scripts": [], + "own_tokenizer": false }, { "name": "Paasaal", "iso_1_code": null, "iso_3_code": "sig", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7625", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sisaala, Tumulung", "iso_1_code": null, "iso_3_code": "sil", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7626", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sissala", "iso_1_code": null, "iso_3_code": "sld", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7627", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sisaala, Western", "iso_1_code": null, "iso_3_code": "ssl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7628", + "scripts": [], + "own_tokenizer": false }, { "name": "Tampulma", "iso_1_code": null, "iso_3_code": "tpm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7629", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vagla", "iso_1_code": null, "iso_3_code": "vag", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7630", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7620", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7604", + "scripts": [], + "own_tokenizer": false }, { "name": "Kirma-Tyurama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Cerma", "iso_1_code": null, "iso_3_code": "cme", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7632", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Turka", "iso_1_code": null, "iso_3_code": "tuz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7633", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7631", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7594", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7538", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulango", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kulango, Bondoukou", "iso_1_code": null, "iso_3_code": "kzc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7635", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulango, Bouna", "iso_1_code": null, "iso_3_code": "nku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7636", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7634", + "scripts": [], + "own_tokenizer": false }, { "name": "Lobi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lobi", "iso_1_code": null, "iso_3_code": "lob", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7638", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7637", + "scripts": [], + "own_tokenizer": false }, { "name": "Senufo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Karaboro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Karaboro, Western", "iso_1_code": null, "iso_3_code": "kza", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7641", + "scripts": [], + "own_tokenizer": false }, { "name": "Karaboro, Eastern", "iso_1_code": null, "iso_3_code": "xrb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7642", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7640", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpalaga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "S\u00e9noufo, Palaka", "iso_1_code": null, "iso_3_code": "plr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7644", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7643", + "scripts": [], + "own_tokenizer": false }, { "name": "Nafaanra", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nafaanra", "iso_1_code": null, "iso_3_code": "nfr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7646", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7645", + "scripts": [], + "own_tokenizer": false }, { "name": "Senari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "S\u00e9noufo, Cebaara", "iso_1_code": null, "iso_3_code": "sef", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7648", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00e9noufo, Senara", "iso_1_code": null, "iso_3_code": "seq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7649", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00e9noufo, Syenara", "iso_1_code": null, "iso_3_code": "shz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7650", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7647", + "scripts": [], + "own_tokenizer": false }, { "name": "Suppire-Mamara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "S\u00e9noufo, Mamara", "iso_1_code": null, "iso_3_code": "myk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7652", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "S\u00e9noufo, Shempire", "iso_1_code": null, "iso_3_code": "seb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7653", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00e9noufo, Nanerig\u00e9", "iso_1_code": null, "iso_3_code": "sen", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7654", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00e9noufo, S\u00ecc\u00ect\u00e9", "iso_1_code": null, "iso_3_code": "sep", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7655", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00e9noufo, Supyire", "iso_1_code": null, "iso_3_code": "spp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7656", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7651", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagwana-Djimini", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "S\u00e9noufo, Djimini", "iso_1_code": null, "iso_3_code": "dyi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7658", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "S\u00e9noufo, Nyarafolo", "iso_1_code": null, "iso_3_code": "sev", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7659", + "scripts": [], + "own_tokenizer": false }, { "name": "S\u00e9noufo, Tagwana", "iso_1_code": null, "iso_3_code": "tgw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7660", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7657", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7639", + "scripts": [], + "own_tokenizer": false }, { "name": "Teen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Loma", "iso_1_code": null, "iso_3_code": "loi", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7662", + "scripts": [], + "own_tokenizer": false }, { "name": "T\u00e9\u00e9n", "iso_1_code": null, "iso_3_code": "lor", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7663", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7661", + "scripts": [], + "own_tokenizer": false }, { "name": "Tiefo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ti\u00e9fo", "iso_1_code": null, "iso_3_code": "tiq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7665", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7664", + "scripts": [], + "own_tokenizer": false }, { "name": "Tusia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Toussian, Northern", "iso_1_code": null, "iso_3_code": "tsp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7667", + "scripts": [], + "own_tokenizer": false }, { "name": "Toussian, Southern", "iso_1_code": null, "iso_3_code": "wib", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7668", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7666", + "scripts": [], + "own_tokenizer": false }, { "name": "Viemo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Viemo", "iso_1_code": null, "iso_3_code": "vig", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7670", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7669", + "scripts": [], + "own_tokenizer": false }, { "name": "Wara-Natioro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Natioro", "iso_1_code": null, "iso_3_code": "nti", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7672", + "scripts": [], + "own_tokenizer": false }, { "name": "Paleni", "iso_1_code": null, "iso_3_code": "pnl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7673", + "scripts": [], + "own_tokenizer": false }, { "name": "Wara", "iso_1_code": null, "iso_3_code": "wbf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7674", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7671", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7535", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7286", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5448", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5321", + "scripts": [], + "own_tokenizer": false }, { "name": "Kordofanian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Heiban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ko", "iso_1_code": null, "iso_3_code": "fuj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7678", + "scripts": [], + "own_tokenizer": false }, { "name": "Warnang", "iso_1_code": null, "iso_3_code": "wrn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7679", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7677", + "scripts": [], + "own_tokenizer": false }, { "name": "West-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ebang-Logol", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ebang-Laru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Heiban", "iso_1_code": null, "iso_3_code": "hbn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7684", + "scripts": [], + "own_tokenizer": false }, { "name": "Laro", "iso_1_code": null, "iso_3_code": "lro", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7685", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7683", + "scripts": [], + "own_tokenizer": false }, { "name": "Logol", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Logol", "iso_1_code": null, "iso_3_code": "lof", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7687", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7686", + "scripts": [], + "own_tokenizer": false }, { "name": "Utoro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Otoro", "iso_1_code": null, "iso_3_code": "otr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7689", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7688", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7682", + "scripts": [], + "own_tokenizer": false }, { "name": "Rere", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Koalib", "iso_1_code": null, "iso_3_code": "kib", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7691", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7690", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7681", + "scripts": [], + "own_tokenizer": false }, { "name": "Shirumba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shwai", "iso_1_code": null, "iso_3_code": "shw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7693", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7692", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Moro", "iso_1_code": null, "iso_3_code": "mor", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7695", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tira", "iso_1_code": null, "iso_3_code": "tic", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7696", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7694", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7680", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7676", + "scripts": [], + "own_tokenizer": false }, { "name": "Katla", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Katla", "iso_1_code": null, "iso_3_code": "kcr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7698", + "scripts": [], + "own_tokenizer": false }, { "name": "Tima", "iso_1_code": null, "iso_3_code": "tms", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7699", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7697", + "scripts": [], + "own_tokenizer": false }, { "name": "Rashad", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Tegali", "iso_1_code": null, "iso_3_code": "ras", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7701", + "scripts": [], + "own_tokenizer": false }, { "name": "Tagoi", "iso_1_code": null, "iso_3_code": "tag", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7702", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7700", + "scripts": [], + "own_tokenizer": false }, { "name": "Talodi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Talodi Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jomang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Talodi", "iso_1_code": null, "iso_3_code": "tlo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7706", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7705", + "scripts": [], + "own_tokenizer": false }, { "name": "Nding", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nding", "iso_1_code": null, "iso_3_code": "eli", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7708", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7707", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngile-Dengebu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dagik", "iso_1_code": null, "iso_3_code": "dec", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7710", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngile", "iso_1_code": null, "iso_3_code": "jle", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7711", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7709", + "scripts": [], + "own_tokenizer": false }, { "name": "Tocho", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Acheron", "iso_1_code": null, "iso_3_code": "acz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7713", + "scripts": [], + "own_tokenizer": false }, { "name": "Lumun", "iso_1_code": null, "iso_3_code": "lmd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7714", + "scripts": [], + "own_tokenizer": false }, { "name": "Tocho", "iso_1_code": null, "iso_3_code": "taz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7715", + "scripts": [], + "own_tokenizer": false }, { "name": "Torona", "iso_1_code": null, "iso_3_code": "tqr", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7716", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7712", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7704", + "scripts": [], + "own_tokenizer": false }, { "name": "Tegem", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Lafofa", "iso_1_code": null, "iso_3_code": "laf", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7718", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7717", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7703", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7675", + "scripts": [], + "own_tokenizer": false }, { "name": "Mande", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bissa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bisa", "iso_1_code": null, "iso_3_code": "bib", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7723", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7722", + "scripts": [], + "own_tokenizer": false }, { "name": "Busa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boko", "iso_1_code": null, "iso_3_code": "bqc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7725", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bis\u00e3", "iso_1_code": null, "iso_3_code": "bqp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7726", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bokobaru", "iso_1_code": null, "iso_3_code": "bus", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7727", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7724", + "scripts": [], + "own_tokenizer": false }, { "name": "Kyanga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Shanga", "iso_1_code": null, "iso_3_code": "sho", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7729", + "scripts": [], + "own_tokenizer": false }, { "name": "Kyanga", "iso_1_code": null, "iso_3_code": "tye", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7730", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7728", + "scripts": [], + "own_tokenizer": false }, { "name": "Samo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Samo, Southern", "iso_1_code": null, "iso_3_code": "sbd", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7732", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Samo, Matya", "iso_1_code": null, "iso_3_code": "stj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7733", + "scripts": [], + "own_tokenizer": false }, { "name": "Samo, Maya", "iso_1_code": null, "iso_3_code": "sym", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7734", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7731", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7721", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gbin", "iso_1_code": null, "iso_3_code": "xgb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7736", + "scripts": [], + "own_tokenizer": false }, { "name": "Guro-Tura", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Guro-Yaoure", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Guro", "iso_1_code": null, "iso_3_code": "goa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7739", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yaour\u00e9", "iso_1_code": null, "iso_3_code": "yre", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7740", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7738", + "scripts": [], + "own_tokenizer": false }, { "name": "Tura-Dan-Mano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Maan", "iso_1_code": null, "iso_3_code": "mev", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7743", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7742", + "scripts": [], + "own_tokenizer": false }, { "name": "Tura-Dan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Dan", "iso_1_code": null, "iso_3_code": "dnj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7745", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Goo", "iso_1_code": null, "iso_3_code": "gov", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7746", + "scripts": [], + "own_tokenizer": false }, { "name": "Kla-Dan", "iso_1_code": null, "iso_3_code": "lda", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7747", + "scripts": [], + "own_tokenizer": false }, { "name": "Toura", "iso_1_code": null, "iso_3_code": "neb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7748", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7744", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7741", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7737", + "scripts": [], + "own_tokenizer": false }, { "name": "Nwa-Ben", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ben-Gban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Gban", "iso_1_code": null, "iso_3_code": "ggu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7751", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngen", "iso_1_code": null, "iso_3_code": "gnj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7752", + "scripts": [], + "own_tokenizer": false }, { "name": "Beng", "iso_1_code": null, "iso_3_code": "nhb", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7753", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7750", + "scripts": [], + "own_tokenizer": false }, { "name": "Wan-Mwan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mwan", "iso_1_code": null, "iso_3_code": "moa", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7755", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wan", "iso_1_code": null, "iso_3_code": "wan", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7756", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7754", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7749", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7735", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7720", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central-Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Manding-Jogo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jogo-Jeri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jeri Kuo", "iso_1_code": null, "iso_3_code": "jek", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7762", + "scripts": [], + "own_tokenizer": false }, { "name": "Jeri-Jalkuna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Jalkunan", "iso_1_code": null, "iso_3_code": "bxl", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7764", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7763", + "scripts": [], + "own_tokenizer": false }, { "name": "Jogo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Ligbi", "iso_1_code": null, "iso_3_code": "lig", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7766", + "scripts": [], + "own_tokenizer": false }, { "name": "Tonjon", "iso_1_code": null, "iso_3_code": "tjn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7767", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7765", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7761", + "scripts": [], + "own_tokenizer": false }, { "name": "Manding-Vai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Manding-Mokole", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Manding", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bolon", "iso_1_code": null, "iso_3_code": "bof", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7771", + "scripts": [], + "own_tokenizer": false }, { "name": "Jahanka", "iso_1_code": null, "iso_3_code": "jad", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7772", + "scripts": [], + "own_tokenizer": false }, { "name": "Sininkere", "iso_1_code": null, "iso_3_code": "skq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7773", + "scripts": [], + "own_tokenizer": false }, { "name": "Manding-East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Marka-Dafin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Marka", "iso_1_code": null, "iso_3_code": "rkm", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7776", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7775", + "scripts": [], + "own_tokenizer": false }, { "name": "Northeastern Manding", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bamana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bamanankan", "iso_1_code": "bm", "iso_3_code": "bam", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7779", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jula", "iso_1_code": null, "iso_3_code": "dyu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7780", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7778", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7777", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeastern Manding", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Maninkakan, Eastern", "iso_1_code": null, "iso_3_code": "emk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7782", + "scripts": [], + "own_tokenizer": false }, { "name": "Konyanka", "iso_1_code": null, "iso_3_code": "mku", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7783", + "scripts": [], + "own_tokenizer": false }, { "name": "Maninka, Sankaran", "iso_1_code": null, "iso_3_code": "msc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7784", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Manya", "iso_1_code": null, "iso_3_code": "mzj", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7785", + "scripts": [], + "own_tokenizer": false }, { "name": "Maninka-Mori", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Wojenaka", "iso_1_code": null, "iso_3_code": "jod", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7787", + "scripts": [], + "own_tokenizer": false }, { "name": "Worodougou", "iso_1_code": null, "iso_3_code": "jud", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7788", + "scripts": [], + "own_tokenizer": false }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "kfo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7789", + "scripts": [], + "own_tokenizer": false }, { "name": "Koyaga", "iso_1_code": null, "iso_3_code": "kga", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7790", + "scripts": [], + "own_tokenizer": false }, { "name": "Mahou", "iso_1_code": null, "iso_3_code": "mxx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7791", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7786", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7781", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7774", + "scripts": [], + "own_tokenizer": false }, { "name": "Manding-West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Xaasongaxango", "iso_1_code": null, "iso_3_code": "kao", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7793", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maninkakan, Western", "iso_1_code": null, "iso_3_code": "mlq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7794", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandinka", "iso_1_code": null, "iso_3_code": "mnk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7795", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maninkakan, Kita", "iso_1_code": null, "iso_3_code": "mwk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7796", + "scripts": [], + "own_tokenizer": false }, { "name": "Kagoro", "iso_1_code": null, "iso_3_code": "xkg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] - } - ] + "tokenizers": {}, + "children": [], + "node_i": "7797", + "scripts": [], + "own_tokenizer": false + } + ], + "node_i": "7792", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7770", + "scripts": [], + "own_tokenizer": false }, { "name": "Mokole", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kakabe", "iso_1_code": null, "iso_3_code": "kke", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7799", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuranko", "iso_1_code": null, "iso_3_code": "knk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7800", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lele", "iso_1_code": null, "iso_3_code": "llc", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7801", + "scripts": [], + "own_tokenizer": false }, { "name": "Mogofin", "iso_1_code": null, "iso_3_code": "mfg", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } + }, + "children": [], + "node_i": "7802", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7798", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7769", + "scripts": [], + "own_tokenizer": false }, { "name": "Vai-Kono", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kono", "iso_1_code": null, "iso_3_code": "kno", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7804", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vai", "iso_1_code": null, "iso_3_code": "vai", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7805", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7803", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7768", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7760", + "scripts": [], + "own_tokenizer": false }, { "name": "Susu-Yalunka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Susu", "iso_1_code": null, "iso_3_code": "sus", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7807", + "scripts": [ + "Arab", + "Latn" + ], + "own_tokenizer": false }, { "name": "Yalunka", "iso_1_code": null, "iso_3_code": "yal", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7808", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7806", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7759", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kpelle", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kpelle, Guinea", "iso_1_code": null, "iso_3_code": "gkp", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7811", + "scripts": [], + "own_tokenizer": false }, { "name": "Kono", "iso_1_code": null, "iso_3_code": "knu", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7812", + "scripts": [], + "own_tokenizer": false }, { "name": "Kpelle, Liberia", "iso_1_code": null, "iso_3_code": "xpe", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7813", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7810", + "scripts": [], + "own_tokenizer": false }, { "name": "Mende-Loma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Zialo", "iso_1_code": null, "iso_3_code": "zil", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7815", + "scripts": [], + "own_tokenizer": false }, { "name": "Loma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Loma", "iso_1_code": null, "iso_3_code": "lom", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7817", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Toma", "iso_1_code": null, "iso_3_code": "tod", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7818", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7816", + "scripts": [], + "own_tokenizer": false }, { "name": "Mende-Bandi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bandi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bandi", "iso_1_code": null, "iso_3_code": "bza", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7821", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7820", + "scripts": [], + "own_tokenizer": false }, { "name": "Mende-Loko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Loko", "iso_1_code": null, "iso_3_code": "lok", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7823", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mende", "iso_1_code": null, "iso_3_code": "men", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7824", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7822", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7819", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7814", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7809", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7758", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Kpeego", "iso_1_code": null, "iso_3_code": "cpo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7826", + "scripts": [], + "own_tokenizer": false }, { "name": "Samogo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bankagooma", "iso_1_code": null, "iso_3_code": "bxw", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7828", + "scripts": [], + "own_tokenizer": false }, { "name": "Dz\u00f9\u00f9ngoo", "iso_1_code": null, "iso_3_code": "dnn", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7829", + "scripts": [], + "own_tokenizer": false }, { "name": "Duungooma", "iso_1_code": null, "iso_3_code": "dux", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7830", + "scripts": [], + "own_tokenizer": false }, { "name": "Jowulu", "iso_1_code": null, "iso_3_code": "jow", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7831", + "scripts": [], + "own_tokenizer": false }, { "name": "Seenku", "iso_1_code": null, "iso_3_code": "sos", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7832", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7827", + "scripts": [], + "own_tokenizer": false }, { "name": "Soninke-Bobo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bobo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Konab\u00e9r\u00e9", "iso_1_code": null, "iso_3_code": "bbo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7835", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bobo Madar\u00e9, Southern", "iso_1_code": null, "iso_3_code": "bwq", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "7836", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7834", + "scripts": [], + "own_tokenizer": false }, { "name": "Soninke-Boso", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Boso", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bozo, Tiemac\u00e8w\u00e8", "iso_1_code": null, "iso_3_code": "boo", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7840", + "scripts": [], + "own_tokenizer": false }, { "name": "Bozo, Tieyaxo", "iso_1_code": null, "iso_3_code": "boz", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7841", + "scripts": [], + "own_tokenizer": false }, { "name": "Bozo, Kelengaxo", "iso_1_code": null, "iso_3_code": "bzx", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7842", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7839", + "scripts": [], + "own_tokenizer": false }, { "name": "Jenaama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Bozo, Jenaama", "iso_1_code": null, "iso_3_code": "bze", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7844", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7843", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7838", + "scripts": [], + "own_tokenizer": false }, { "name": "Soninke", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Soninke", "iso_1_code": null, "iso_3_code": "snk", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7846", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7845", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7837", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7833", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7825", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7757", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7719", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mbre", "iso_1_code": null, "iso_3_code": "mka", - "tokenizer": { - "name": "ganda", - "tokenizer": "SpaCyTokenizer(\"lg\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7848", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7847", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "5320", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Nilo-Saharan.json b/data/Nilo-Saharan.json index 7827167687d1fe0cc35bc560a808d17c24856283..1f3354bd22bfc8c76a2de70a49b64b802792a4f2 100644 --- a/data/Nilo-Saharan.json +++ b/data/Nilo-Saharan.json @@ -2,2885 +2,3686 @@ "name": "Nilo-Saharan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kuliak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ik", "iso_1_code": null, "iso_3_code": "ikx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7852", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7851", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngangea-So", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nyang\u2019i", "iso_1_code": null, "iso_3_code": "nyp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7854", + "scripts": [], + "own_tokenizer": false }, { "name": "Soo", "iso_1_code": null, "iso_3_code": "teu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7855", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7853", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7850", + "scripts": [], + "own_tokenizer": false }, { "name": "Saharan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Berti", "iso_1_code": null, "iso_3_code": "byt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7858", + "scripts": [], + "own_tokenizer": false }, { "name": "Zaghawa", "iso_1_code": null, "iso_3_code": "zag", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7859", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7857", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kanuri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kanuri, Bilma", "iso_1_code": null, "iso_3_code": "bms", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7862", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanembu", "iso_1_code": null, "iso_3_code": "kbl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7863", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanuri, Manga", "iso_1_code": "kr", "iso_3_code": "kby", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7864", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kanuri, Yerwa", "iso_1_code": "kr", "iso_3_code": "knc", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7865", + "scripts": [ + "Latn", + "Arab" + ], + "own_tokenizer": false }, { "name": "Kanuri, Tumari", "iso_1_code": "kr", "iso_3_code": "krt", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7866", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarjumo", "iso_1_code": null, "iso_3_code": "txj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7867", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7861", + "scripts": [], + "own_tokenizer": false }, { "name": "Tebu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dazaga", "iso_1_code": null, "iso_3_code": "dzg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7869", + "scripts": [], + "own_tokenizer": false }, { "name": "Tedaga", "iso_1_code": null, "iso_3_code": "tuq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7870", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7868", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7860", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7856", + "scripts": [], + "own_tokenizer": false }, { "name": "Satellite-Core", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Core", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "B\u2019aga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Daats\u02bci\u0301in", "iso_1_code": null, "iso_3_code": "dtn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7874", + "scripts": [], + "own_tokenizer": false }, { "name": "Gumuz", "iso_1_code": null, "iso_3_code": "guk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7875", + "scripts": [ + "Ethi" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7873", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Sudanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern (k languages)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nara", "iso_1_code": null, "iso_3_code": "nrb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7879", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7878", + "scripts": [], + "own_tokenizer": false }, { "name": "Nubian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mattokki", "iso_1_code": null, "iso_3_code": "xnz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7882", + "scripts": [], + "own_tokenizer": false }, { "name": "Birked", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Birked", "iso_1_code": null, "iso_3_code": "brk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7884", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7883", + "scripts": [], + "own_tokenizer": false }, { "name": "Dongolawi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andaandi", "iso_1_code": null, "iso_3_code": "dgl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7886", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7885", + "scripts": [], + "own_tokenizer": false }, { "name": "Hill", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kadaru-Ghulfan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ghulfan", "iso_1_code": null, "iso_3_code": "ghl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7889", + "scripts": [], + "own_tokenizer": false }, { "name": "Kadaru", "iso_1_code": null, "iso_3_code": "kdu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7890", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7888", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dilling", "iso_1_code": null, "iso_3_code": "dil", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7892", + "scripts": [], + "own_tokenizer": false }, { "name": "Dair", "iso_1_code": null, "iso_3_code": "drb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7893", + "scripts": [], + "own_tokenizer": false }, { "name": "El Hugeirat", "iso_1_code": null, "iso_3_code": "elh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7894", + "scripts": [], + "own_tokenizer": false }, { "name": "Karko", "iso_1_code": null, "iso_3_code": "kko", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7895", + "scripts": [], + "own_tokenizer": false }, { "name": "Wali", "iso_1_code": null, "iso_3_code": "wll", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7896", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7891", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7887", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7881", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nobiin", "iso_1_code": null, "iso_3_code": "fia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7898", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7897", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Midob", "iso_1_code": null, "iso_3_code": "mei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7900", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7899", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7880", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyimang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Afitti", "iso_1_code": null, "iso_3_code": "aft", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7902", + "scripts": [], + "own_tokenizer": false }, { "name": "Ama", "iso_1_code": null, "iso_3_code": "nyi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7903", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7901", + "scripts": [], + "own_tokenizer": false }, { "name": "Tama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mararit", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mararit", "iso_1_code": null, "iso_3_code": "mgb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7906", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7905", + "scripts": [], + "own_tokenizer": false }, { "name": "Tama-Sungor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Assangori", "iso_1_code": null, "iso_3_code": "sjg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7908", + "scripts": [], + "own_tokenizer": false }, { "name": "Tama", "iso_1_code": null, "iso_3_code": "tma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7909", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7907", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7904", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7877", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern (n languages)", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Daju", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern Daju", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Logorik", "iso_1_code": null, "iso_3_code": "liu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7913", + "scripts": [], + "own_tokenizer": false }, { "name": "Shatt", "iso_1_code": null, "iso_3_code": "shj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7914", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7912", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Daju", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baygo", "iso_1_code": null, "iso_3_code": "byg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7916", + "scripts": [], + "own_tokenizer": false }, { "name": "Daju, Dar Fur", "iso_1_code": null, "iso_3_code": "daj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7917", + "scripts": [], + "own_tokenizer": false }, { "name": "Daju, Dar Sila", "iso_1_code": null, "iso_3_code": "dau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7918", + "scripts": [], + "own_tokenizer": false }, { "name": "Daju, Dar Daju", "iso_1_code": null, "iso_3_code": "djc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7919", + "scripts": [], + "own_tokenizer": false }, { "name": "Njalgulgule", "iso_1_code": null, "iso_3_code": "njl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7920", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7915", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7911", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Jebel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aka-Kelo-Molo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aka", "iso_1_code": null, "iso_3_code": "soh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7923", + "scripts": [], + "own_tokenizer": false }, { "name": "Kelo", "iso_1_code": null, "iso_3_code": "xel", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7924", + "scripts": [], + "own_tokenizer": false }, { "name": "Molo", "iso_1_code": null, "iso_3_code": "zmo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7925", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7922", + "scripts": [], + "own_tokenizer": false }, { "name": "Gaam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gaam", "iso_1_code": null, "iso_3_code": "tbi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7927", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7926", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7921", + "scripts": [], + "own_tokenizer": false }, { "name": "Nilotic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bari", "iso_1_code": null, "iso_3_code": "bfa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7931", + "scripts": [], + "own_tokenizer": false }, { "name": "Kakwa", "iso_1_code": null, "iso_3_code": "keo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7932", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mandari", "iso_1_code": null, "iso_3_code": "mqu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7933", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuku", "iso_1_code": null, "iso_3_code": "ukv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7934", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7930", + "scripts": [], + "own_tokenizer": false }, { "name": "Lotuxo-Teso", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lotuxo-Maa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lotuxo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dongotono", "iso_1_code": null, "iso_3_code": "ddd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7938", + "scripts": [], + "own_tokenizer": false }, { "name": "Imotong", "iso_1_code": null, "iso_3_code": "imt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7939", + "scripts": [], + "own_tokenizer": false }, { "name": "Lango", "iso_1_code": null, "iso_3_code": "lgo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7940", + "scripts": [], + "own_tokenizer": false }, { "name": "Lokoya", "iso_1_code": null, "iso_3_code": "lky", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7941", + "scripts": [], + "own_tokenizer": false }, { "name": "Otuho", "iso_1_code": null, "iso_3_code": "lot", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7942", + "scripts": [], + "own_tokenizer": false }, { "name": "Lopit", "iso_1_code": null, "iso_3_code": "lpx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7943", + "scripts": [], + "own_tokenizer": false }, { "name": "Logir", "iso_1_code": null, "iso_3_code": "lqr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7944", + "scripts": [], + "own_tokenizer": false }, { "name": "Okolie", "iso_1_code": null, "iso_3_code": "oie", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7945", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7937", + "scripts": [], + "own_tokenizer": false }, { "name": "Ongamo-Maa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Maasai", "iso_1_code": null, "iso_3_code": "mas", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7947", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngasa", "iso_1_code": null, "iso_3_code": "nsg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7948", + "scripts": [], + "own_tokenizer": false }, { "name": "Samburu", "iso_1_code": null, "iso_3_code": "saq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7949", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7946", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7936", + "scripts": [], + "own_tokenizer": false }, { "name": "Teso-Turkana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Teso", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ateso", "iso_1_code": null, "iso_3_code": "teo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7952", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7951", + "scripts": [], + "own_tokenizer": false }, { "name": "Turkana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ng\u2019akarimojong", "iso_1_code": null, "iso_3_code": "kdj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7954", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nyangatom", "iso_1_code": null, "iso_3_code": "nnj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7955", + "scripts": [], + "own_tokenizer": false }, { "name": "Toposa", "iso_1_code": null, "iso_3_code": "toq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7956", + "scripts": [], + "own_tokenizer": false }, { "name": "Turkana", "iso_1_code": null, "iso_3_code": "tuv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7957", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7953", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7950", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7935", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7929", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kalenjin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Elgon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kupsapiiny", "iso_1_code": null, "iso_3_code": "kpz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7961", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sabaot", "iso_1_code": null, "iso_3_code": "spy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7962", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7960", + "scripts": [], + "own_tokenizer": false }, { "name": "Nandi-Markweta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kipsigis", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kipsigis", "iso_1_code": null, "iso_3_code": "sgc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7965", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7964", + "scripts": [], + "own_tokenizer": false }, { "name": "Markweta", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Markweeta", "iso_1_code": null, "iso_3_code": "enb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7967", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7966", + "scripts": [], + "own_tokenizer": false }, { "name": "Nandi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Keiyo", "iso_1_code": null, "iso_3_code": "eyo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7969", + "scripts": [], + "own_tokenizer": false }, { "name": "Kisankasa", "iso_1_code": null, "iso_3_code": "kqh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7970", + "scripts": [], + "own_tokenizer": false }, { "name": "Nandi", "iso_1_code": null, "iso_3_code": "niq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7971", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Terik", "iso_1_code": null, "iso_3_code": "tec", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7972", + "scripts": [], + "own_tokenizer": false }, { "name": "Tugen", "iso_1_code": null, "iso_3_code": "tuy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7973", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7968", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7963", + "scripts": [], + "own_tokenizer": false }, { "name": "Okiek", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Okiek", "iso_1_code": null, "iso_3_code": "oki", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7975", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7974", + "scripts": [], + "own_tokenizer": false }, { "name": "Pokot", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "P\u00f6koot", "iso_1_code": null, "iso_3_code": "pko", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7977", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7976", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7959", + "scripts": [], + "own_tokenizer": false }, { "name": "Tatoga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Omotik", "iso_1_code": null, "iso_3_code": "omt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7979", + "scripts": [], + "own_tokenizer": false }, { "name": "Datooga", "iso_1_code": null, "iso_3_code": "tcc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7980", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7978", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7958", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dinka-Nuer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dinka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dinka, South Central", "iso_1_code": null, "iso_3_code": "dib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7984", + "scripts": [], + "own_tokenizer": false }, { "name": "Dinka, Southwestern", "iso_1_code": null, "iso_3_code": "dik", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7985", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dinka, Northeastern", "iso_1_code": null, "iso_3_code": "dip", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7986", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dinka, Northwestern", "iso_1_code": null, "iso_3_code": "diw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7987", + "scripts": [], + "own_tokenizer": false }, { "name": "Dinka, Southeastern", "iso_1_code": null, "iso_3_code": "dks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7988", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7983", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuer", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Reel", "iso_1_code": null, "iso_3_code": "atu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7990", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuer", "iso_1_code": null, "iso_3_code": "nus", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7991", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7989", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7982", + "scripts": [], + "own_tokenizer": false }, { "name": "Luo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anuak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anuak", "iso_1_code": null, "iso_3_code": "anu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7995", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7994", + "scripts": [], + "own_tokenizer": false }, { "name": "Bor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Belanda Bor", "iso_1_code": null, "iso_3_code": "bxb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7997", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7996", + "scripts": [], + "own_tokenizer": false }, { "name": "Jur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Luwo", "iso_1_code": null, "iso_3_code": "lwo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "7999", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "7998", + "scripts": [], + "own_tokenizer": false }, { "name": "Mabaan-Burun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Burun", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Burun", "iso_1_code": null, "iso_3_code": "bdi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8002", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8001", + "scripts": [], + "own_tokenizer": false }, { "name": "Mabaan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jumjum", "iso_1_code": null, "iso_3_code": "jum", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8004", + "scripts": [], + "own_tokenizer": false }, { "name": "Mabaan", "iso_1_code": null, "iso_3_code": "mfz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8005", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8003", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8000", + "scripts": [], + "own_tokenizer": false }, { "name": "Shilluk", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Shilluk", "iso_1_code": null, "iso_3_code": "shk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8007", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8006", + "scripts": [], + "own_tokenizer": false }, { "name": "Thuri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Thuri", "iso_1_code": null, "iso_3_code": "thu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8009", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8008", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "P\u00e4ri", "iso_1_code": null, "iso_3_code": "lkr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8011", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8010", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7993", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Adhola", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jopadhola", "iso_1_code": null, "iso_3_code": "adh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8014", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8013", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kumam", "iso_1_code": null, "iso_3_code": "kdi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8016", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8015", + "scripts": [], + "own_tokenizer": false }, { "name": "Luo-Acholi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alur-Acholi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Thur", "iso_1_code": null, "iso_3_code": "lth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8019", + "scripts": [], + "own_tokenizer": false }, { "name": "Alur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alur", "iso_1_code": null, "iso_3_code": "alz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8021", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8020", + "scripts": [], + "own_tokenizer": false }, { "name": "Lango-Acholi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Acholi", "iso_1_code": null, "iso_3_code": "ach", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8023", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lango", "iso_1_code": null, "iso_3_code": "laj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8024", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8022", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8018", + "scripts": [], + "own_tokenizer": false }, { "name": "Luo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dholuo", "iso_1_code": null, "iso_3_code": "luo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8026", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8025", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8017", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8012", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7992", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7981", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7928", + "scripts": [], + "own_tokenizer": false }, { "name": "Surmic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Majang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Majang", "iso_1_code": null, "iso_3_code": "mpe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8030", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8029", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8028", + "scripts": [], + "own_tokenizer": false }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kwegu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kwegu", "iso_1_code": null, "iso_3_code": "xwg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8034", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8033", + "scripts": [], + "own_tokenizer": false }, { "name": "Pastoral", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Me\u2019en", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Me\u2019en", "iso_1_code": null, "iso_3_code": "mym", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8037", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8036", + "scripts": [], + "own_tokenizer": false }, { "name": "Suri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mursi", "iso_1_code": null, "iso_3_code": "muz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8039", + "scripts": [], + "own_tokenizer": false }, { "name": "Suri, Tirmaga-Chai", "iso_1_code": null, "iso_3_code": "suq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8040", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8038", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8035", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8032", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Didinga-Murle", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Didinga-Longarim", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Didinga", "iso_1_code": null, "iso_3_code": "did", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8044", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Laarim", "iso_1_code": null, "iso_3_code": "loh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8045", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8043", + "scripts": [], + "own_tokenizer": false }, { "name": "Murle", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Murle", "iso_1_code": null, "iso_3_code": "mur", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8047", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8046", + "scripts": [], + "own_tokenizer": false }, { "name": "Tennet", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tennet", "iso_1_code": null, "iso_3_code": "tex", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8049", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8048", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8042", + "scripts": [], + "own_tokenizer": false }, { "name": "Kacipo-Balesi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Suri, Kacipo-Bale", "iso_1_code": null, "iso_3_code": "koe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8051", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8050", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8041", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8031", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8027", + "scripts": [], + "own_tokenizer": false }, { "name": "Temein", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tese", "iso_1_code": null, "iso_3_code": "keg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8053", + "scripts": [], + "own_tokenizer": false }, { "name": "Temein", "iso_1_code": null, "iso_3_code": "teq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8054", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8052", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7910", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7876", + "scripts": [], + "own_tokenizer": false }, { "name": "Kadugli-Krongo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kanga", "iso_1_code": null, "iso_3_code": "kcp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8056", + "scripts": [], + "own_tokenizer": false }, { "name": "Keiga", "iso_1_code": null, "iso_3_code": "kec", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8057", + "scripts": [], + "own_tokenizer": false }, { "name": "Krongo", "iso_1_code": null, "iso_3_code": "kgo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8058", + "scripts": [], + "own_tokenizer": false }, { "name": "Tumtum", "iso_1_code": null, "iso_3_code": "tbr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8059", + "scripts": [], + "own_tokenizer": false }, { "name": "Tulishi", "iso_1_code": null, "iso_3_code": "tey", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8060", + "scripts": [], + "own_tokenizer": false }, { "name": "Katcha-Kadugli-Miri", "iso_1_code": null, "iso_3_code": "xtc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8061", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8055", + "scripts": [], + "own_tokenizer": false }, { "name": "Koman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gule", "iso_1_code": null, "iso_3_code": "gly", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8063", + "scripts": [], + "own_tokenizer": false }, { "name": "Gwama", "iso_1_code": null, "iso_3_code": "kmq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8064", + "scripts": [], + "own_tokenizer": false }, { "name": "Opo", "iso_1_code": null, "iso_3_code": "lgn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8065", + "scripts": [], + "own_tokenizer": false }, { "name": "Uduk", "iso_1_code": null, "iso_3_code": "udu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8066", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Komo", "iso_1_code": null, "iso_3_code": "xom", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8067", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8062", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7872", + "scripts": [], + "own_tokenizer": false }, { "name": "Satellites", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Berta", "iso_1_code": null, "iso_3_code": "wti", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8069", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Sudanic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lendu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ndrulo", "iso_1_code": null, "iso_3_code": "dno", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8073", + "scripts": [], + "own_tokenizer": false }, { "name": "Lendu", "iso_1_code": null, "iso_3_code": "led", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8074", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngiti", "iso_1_code": null, "iso_3_code": "niy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8075", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8072", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangbetu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Asoa", "iso_1_code": null, "iso_3_code": "asv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8077", + "scripts": [], + "own_tokenizer": false }, { "name": "Lombi", "iso_1_code": null, "iso_3_code": "lmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8078", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangbetu", "iso_1_code": null, "iso_3_code": "mdj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8079", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8076", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangbutu-Efe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bendi", "iso_1_code": null, "iso_3_code": "bct", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8081", + "scripts": [], + "own_tokenizer": false }, { "name": "Efe", "iso_1_code": null, "iso_3_code": "efe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8082", + "scripts": [], + "own_tokenizer": false }, { "name": "Lese", "iso_1_code": null, "iso_3_code": "les", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8083", + "scripts": [], + "own_tokenizer": false }, { "name": "Mamvu", "iso_1_code": null, "iso_3_code": "mdi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8084", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangbutu", "iso_1_code": null, "iso_3_code": "mdk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8085", + "scripts": [], + "own_tokenizer": false }, { "name": "Mvuba", "iso_1_code": null, "iso_3_code": "mxh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8086", + "scripts": [], + "own_tokenizer": false }, { "name": "Kebu", "iso_1_code": null, "iso_3_code": "ndp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8087", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8080", + "scripts": [], + "own_tokenizer": false }, { "name": "Moru-Madi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Avokaya", "iso_1_code": null, "iso_3_code": "avu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8090", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Keliko", "iso_1_code": null, "iso_3_code": "kbo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8091", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lugbara", "iso_1_code": null, "iso_3_code": "lgg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8092", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Logo", "iso_1_code": null, "iso_3_code": "log", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8093", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Aringa", "iso_1_code": null, "iso_3_code": "luc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8094", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Omi", "iso_1_code": null, "iso_3_code": "omi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8095", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8089", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Moru", "iso_1_code": null, "iso_3_code": "mgd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8097", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8096", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Olu\u2019bo", "iso_1_code": null, "iso_3_code": "lul", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8099", + "scripts": [], + "own_tokenizer": false }, { "name": "Ma\u2019di", "iso_1_code": null, "iso_3_code": "mhi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8100", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ma\u2019di, Southern", "iso_1_code": null, "iso_3_code": "snm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8101", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8098", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8088", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8071", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bongo-Bagirmi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bongo-Baka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Beli", "iso_1_code": null, "iso_3_code": "blm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8105", + "scripts": [], + "own_tokenizer": false }, { "name": "Baka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baka", "iso_1_code": null, "iso_3_code": "bdh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8107", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8106", + "scripts": [], + "own_tokenizer": false }, { "name": "Bongo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bongo", "iso_1_code": null, "iso_3_code": "bot", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8109", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8108", + "scripts": [], + "own_tokenizer": false }, { "name": "M\u00f6d\u00f6-Nyamusa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jur Modo", "iso_1_code": null, "iso_3_code": "bex", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8111", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nyamusa-Molo", "iso_1_code": null, "iso_3_code": "nwm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8112", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8110", + "scripts": [], + "own_tokenizer": false }, { "name": "Morokodo-Mo\u2019da", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mo\u2019da", "iso_1_code": null, "iso_3_code": "gbn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8114", + "scripts": [], + "own_tokenizer": false }, { "name": "Morokodo", "iso_1_code": null, "iso_3_code": "mgc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8115", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8113", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mittu", "iso_1_code": null, "iso_3_code": "mwu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8117", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8116", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8104", + "scripts": [], + "own_tokenizer": false }, { "name": "Kara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fulu", "iso_1_code": null, "iso_3_code": "fuu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8119", + "scripts": [], + "own_tokenizer": false }, { "name": "Gula", "iso_1_code": null, "iso_3_code": "kcm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8120", + "scripts": [], + "own_tokenizer": false }, { "name": "Yulu", "iso_1_code": null, "iso_3_code": "yul", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8121", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8118", + "scripts": [], + "own_tokenizer": false }, { "name": "Sara-Bagirmi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Birri", "iso_1_code": null, "iso_3_code": "bvq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8123", + "scripts": [], + "own_tokenizer": false }, { "name": "Fongoro", "iso_1_code": null, "iso_3_code": "fgr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8124", + "scripts": [], + "own_tokenizer": false }, { "name": "Bagirmi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Morom", "iso_1_code": null, "iso_3_code": "bdo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8126", + "scripts": [], + "own_tokenizer": false }, { "name": "Bagirmi", "iso_1_code": null, "iso_3_code": "bmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8127", + "scripts": [], + "own_tokenizer": false }, { "name": "Berakou", "iso_1_code": null, "iso_3_code": "bxv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8128", + "scripts": [], + "own_tokenizer": false }, { "name": "Disa", "iso_1_code": null, "iso_3_code": "dsi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8129", + "scripts": [], + "own_tokenizer": false }, { "name": "Gula", "iso_1_code": null, "iso_3_code": "glu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8130", + "scripts": [], + "own_tokenizer": false }, { "name": "Jaya", "iso_1_code": null, "iso_3_code": "jyy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8131", + "scripts": [], + "own_tokenizer": false }, { "name": "Kenga", "iso_1_code": null, "iso_3_code": "kyq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8132", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naba", "iso_1_code": null, "iso_3_code": "mne", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8133", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8125", + "scripts": [], + "own_tokenizer": false }, { "name": "Sara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sara Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bedjond", "iso_1_code": null, "iso_3_code": "bjv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8136", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dagba", "iso_1_code": null, "iso_3_code": "dgk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8137", + "scripts": [], + "own_tokenizer": false }, { "name": "Gor", "iso_1_code": null, "iso_3_code": "gqr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8138", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gulay", "iso_1_code": null, "iso_3_code": "gvl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8139", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Horo", "iso_1_code": null, "iso_3_code": "hor", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8140", + "scripts": [], + "own_tokenizer": false }, { "name": "Kabba", "iso_1_code": null, "iso_3_code": "ksp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8141", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Laka", "iso_1_code": null, "iso_3_code": "lap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8142", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mango", "iso_1_code": null, "iso_3_code": "mge", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8143", + "scripts": [], + "own_tokenizer": false }, { "name": "Sar", "iso_1_code": null, "iso_3_code": "mwm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8144", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mbay", "iso_1_code": null, "iso_3_code": "myb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8145", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ngam", "iso_1_code": null, "iso_3_code": "nmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8146", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngambay", "iso_1_code": null, "iso_3_code": "sba", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8147", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sara Kaba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaba D\u00e9m\u00e9, Sara", "iso_1_code": null, "iso_3_code": "kwg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8149", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaba Naa, Sara", "iso_1_code": null, "iso_3_code": "kwv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8150", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulfa", "iso_1_code": null, "iso_3_code": "kxj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8151", + "scripts": [], + "own_tokenizer": false }, { "name": "Sara Kaba", "iso_1_code": null, "iso_3_code": "sbz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8152", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8148", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8135", + "scripts": [], + "own_tokenizer": false }, { "name": "Vale", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Luto", "iso_1_code": null, "iso_3_code": "ndy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8154", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vale", "iso_1_code": null, "iso_3_code": "vae", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8155", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8153", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8134", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8122", + "scripts": [], + "own_tokenizer": false }, { "name": "Sinyar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sinyar", "iso_1_code": null, "iso_3_code": "sys", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8157", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8156", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8103", + "scripts": [], + "own_tokenizer": false }, { "name": "Kresh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aja", "iso_1_code": null, "iso_3_code": "aja", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8159", + "scripts": [], + "own_tokenizer": false }, { "name": "Gbaya", "iso_1_code": null, "iso_3_code": "krs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8160", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8158", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8102", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8070", + "scripts": [], + "own_tokenizer": false }, { "name": "Fur", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amdang", "iso_1_code": null, "iso_3_code": "amj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8162", + "scripts": [], + "own_tokenizer": false }, { "name": "Fur", "iso_1_code": null, "iso_3_code": "fvr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8163", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8161", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kunama", "iso_1_code": null, "iso_3_code": "kun", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8165", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8164", + "scripts": [], + "own_tokenizer": false }, { "name": "Maban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karanga", "iso_1_code": null, "iso_3_code": "kth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8167", + "scripts": [], + "own_tokenizer": false }, { "name": "Kendeje", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kendeje", "iso_1_code": null, "iso_3_code": "klf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8169", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8168", + "scripts": [], + "own_tokenizer": false }, { "name": "Maba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Maba", "iso_1_code": null, "iso_3_code": "mde", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8171", + "scripts": [], + "own_tokenizer": false }, { "name": "Marfa", "iso_1_code": null, "iso_3_code": "mvu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8172", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8170", + "scripts": [], + "own_tokenizer": false }, { "name": "Masalit", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Massalat", "iso_1_code": null, "iso_3_code": "mdg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8174", + "scripts": [], + "own_tokenizer": false }, { "name": "Masalit", "iso_1_code": null, "iso_3_code": "mls", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8175", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8173", + "scripts": [], + "own_tokenizer": false }, { "name": "Runga-Kibet", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kibet", "iso_1_code": null, "iso_3_code": "kie", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8177", + "scripts": [], + "own_tokenizer": false }, { "name": "Runga", "iso_1_code": null, "iso_3_code": "rou", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8178", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8176", + "scripts": [], + "own_tokenizer": false }, { "name": "Surbakhal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Surbakhal", "iso_1_code": null, "iso_3_code": "sbj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8180", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8179", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8166", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8068", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7871", + "scripts": [], + "own_tokenizer": false }, { "name": "Songhai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Korandje", "iso_1_code": null, "iso_3_code": "kcy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8182", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tadaksahak", "iso_1_code": null, "iso_3_code": "dsq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8184", + "scripts": [], + "own_tokenizer": false }, { "name": "Tasawaq", "iso_1_code": null, "iso_3_code": "twq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8185", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8183", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dendi", "iso_1_code": null, "iso_3_code": "ddn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8187", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zarma", "iso_1_code": null, "iso_3_code": "dje", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8188", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Songhay, Humburi Senni", "iso_1_code": null, "iso_3_code": "hmb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8189", + "scripts": [], + "own_tokenizer": false }, { "name": "Songhay, Koyra Chiini", "iso_1_code": null, "iso_3_code": "khq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8190", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Songhay, Koyraboro Senni", "iso_1_code": null, "iso_3_code": "ses", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8191", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tondi Songway Kiini", "iso_1_code": null, "iso_3_code": "tst", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8192", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8186", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8181", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "7849", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Nimboran.json b/data/Nimboran.json index d43f876f1eb05daaef15d7a5c41e4e61df843985..c8c7878f7a40b9940a66aaa81f2ef9eea8b5a33e 100644 --- a/data/Nimboran.json +++ b/data/Nimboran.json @@ -2,48 +2,60 @@ "name": "Nimboran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gresi", "iso_1_code": null, "iso_3_code": "grs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8194", + "scripts": [], + "own_tokenizer": false }, { "name": "Mlap", "iso_1_code": null, "iso_3_code": "kja", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8195", + "scripts": [], + "own_tokenizer": false }, { "name": "Kemtuik", "iso_1_code": null, "iso_3_code": "kmt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8196", + "scripts": [], + "own_tokenizer": false }, { "name": "Mekwei", "iso_1_code": null, "iso_3_code": "msf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8197", + "scripts": [], + "own_tokenizer": false }, { "name": "Nimboran", "iso_1_code": null, "iso_3_code": "nir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8198", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8193", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/North Bougainville.json b/data/North Bougainville.json index 541252e6e7f47b59f1e3820245152b8f070be05d..b0a9c72aaf9c7b8e3a7f2bd39ddf058b92d3d8c9 100644 --- a/data/North Bougainville.json +++ b/data/North Bougainville.json @@ -2,67 +2,85 @@ "name": "North Bougainville", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Keriaka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ramopa", "iso_1_code": null, "iso_3_code": "kjx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8200", + "scripts": [], + "own_tokenizer": false }, { "name": "Konua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Rapoisi", "iso_1_code": null, "iso_3_code": "kyx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8203", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8202", + "scripts": [], + "own_tokenizer": false }, { "name": "Rotokas", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Askopan", "iso_1_code": null, "iso_3_code": "eiv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8205", + "scripts": [], + "own_tokenizer": false }, { "name": "Rotokas", "iso_1_code": null, "iso_3_code": "roo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8206", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8204", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8199", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Otomanguean.json b/data/Otomanguean.json index 8b3a151b4a485e28de6bd3891171cb717c5f36ed..a035097fb4bc46e921a8860782011b89535e4f0f 100644 --- a/data/Otomanguean.json +++ b/data/Otomanguean.json @@ -2,1674 +2,2262 @@ "name": "Otomanguean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern Otomanguean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amuzgo-Mixtecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amuzgo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amuzgo, Guerrero", "iso_1_code": null, "iso_3_code": "amu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8211", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Amuzgo, San Pedro Amuzgos", "iso_1_code": null, "iso_3_code": "azg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8212", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Amuzgo, Ipalapa", "iso_1_code": null, "iso_3_code": "azm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8213", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8210", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cuicatec", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cuicatec, Teutila", "iso_1_code": null, "iso_3_code": "cut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8216", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cuicatec, Tepeuxila", "iso_1_code": null, "iso_3_code": "cux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8217", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8215", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mixtec, Western Juxtlahuaca", "iso_1_code": null, "iso_3_code": "jmx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8219", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Yutanduchi", "iso_1_code": null, "iso_3_code": "mab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8220", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Amoltepec", "iso_1_code": null, "iso_3_code": "mbz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8221", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Itundujia", "iso_1_code": null, "iso_3_code": "mce", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8222", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Santa Luc\u00eda Monteverde", "iso_1_code": null, "iso_3_code": "mdv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8223", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Southwestern Tlaxiaco", "iso_1_code": null, "iso_3_code": "meh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8224", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Atatlahuca", "iso_1_code": null, "iso_3_code": "mib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8225", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Ocotepec", "iso_1_code": null, "iso_3_code": "mie", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8226", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, San Miguel el Grande", "iso_1_code": null, "iso_3_code": "mig", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8227", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Chayuco", "iso_1_code": null, "iso_3_code": "mih", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8228", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Chigmecatitl\u00e1n", "iso_1_code": null, "iso_3_code": "mii", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8229", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Pe\u00f1oles", "iso_1_code": null, "iso_3_code": "mil", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8230", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Alacatlatzala", "iso_1_code": null, "iso_3_code": "mim", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8231", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Pinotepa Nacional", "iso_1_code": null, "iso_3_code": "mio", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8232", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Apasco-Apoala", "iso_1_code": null, "iso_3_code": "mip", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8233", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Southern Puebla", "iso_1_code": null, "iso_3_code": "mit", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8234", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Cacaloxtepec", "iso_1_code": null, "iso_3_code": "miu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8235", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Mixtepec", "iso_1_code": null, "iso_3_code": "mix", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8236", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Ayutla", "iso_1_code": null, "iso_3_code": "miy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8237", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Coatzospan", "iso_1_code": null, "iso_3_code": "miz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8238", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, San Juan Colorado", "iso_1_code": null, "iso_3_code": "mjc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8239", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Silacayoapan", "iso_1_code": null, "iso_3_code": "mks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8240", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Yosond\u00faa", "iso_1_code": null, "iso_3_code": "mpm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8241", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Tlazoyaltepec", "iso_1_code": null, "iso_3_code": "mqh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8242", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Tututepec", "iso_1_code": null, "iso_3_code": "mtu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8243", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Tida\u00e1", "iso_1_code": null, "iso_3_code": "mtx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8244", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Yucua\u00f1e", "iso_1_code": null, "iso_3_code": "mvg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8245", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Northwest Oaxaca", "iso_1_code": null, "iso_3_code": "mxa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8246", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Tezoatl\u00e1n", "iso_1_code": null, "iso_3_code": "mxb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8247", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Huitepec", "iso_1_code": null, "iso_3_code": "mxs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8248", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Jamiltepec", "iso_1_code": null, "iso_3_code": "mxt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8249", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Metlat\u00f3noc", "iso_1_code": null, "iso_3_code": "mxv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8250", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Southeastern Nochixtl\u00e1n", "iso_1_code": null, "iso_3_code": "mxy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8251", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Santa Mar\u00eda Zacatepec", "iso_1_code": null, "iso_3_code": "mza", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8252", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Juxtlahuaca", "iso_1_code": null, "iso_3_code": "vmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8253", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Ixtayutla", "iso_1_code": null, "iso_3_code": "vmj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8254", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Mitlatongo", "iso_1_code": null, "iso_3_code": "vmm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8255", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Soyaltepec", "iso_1_code": null, "iso_3_code": "vmq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8256", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Tamazola", "iso_1_code": null, "iso_3_code": "vmx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8257", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Alcozauca", "iso_1_code": null, "iso_3_code": "xta", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8258", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Chazumba", "iso_1_code": null, "iso_3_code": "xtb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8259", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Diuxi-Tilantongo", "iso_1_code": null, "iso_3_code": "xtd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8260", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Sinicahua", "iso_1_code": null, "iso_3_code": "xti", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8261", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, San Juan Teita", "iso_1_code": null, "iso_3_code": "xtj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8262", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Tijaltepec", "iso_1_code": null, "iso_3_code": "xtl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8263", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Magdalena Pe\u00f1asco", "iso_1_code": null, "iso_3_code": "xtm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8264", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, Northern Tlaxiaco", "iso_1_code": null, "iso_3_code": "xtn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8265", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mixtec, San Miguel Piedras", "iso_1_code": null, "iso_3_code": "xtp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8266", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Sindihui", "iso_1_code": null, "iso_3_code": "xts", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8267", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Tacahua", "iso_1_code": null, "iso_3_code": "xtt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8268", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Cuyamecalco", "iso_1_code": null, "iso_3_code": "xtu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8269", + "scripts": [], + "own_tokenizer": false }, { "name": "Mixtec, Yolox\u00f3chitl", "iso_1_code": null, "iso_3_code": "xty", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8270", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8218", + "scripts": [], + "own_tokenizer": false }, { "name": "Trique", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Triqui, Copala", "iso_1_code": null, "iso_3_code": "trc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8272", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Triqui, San Mart\u00edn Itunyoso", "iso_1_code": null, "iso_3_code": "trq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8273", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Triqui, Chicahuaxtla", "iso_1_code": null, "iso_3_code": "trs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8274", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8271", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8214", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8209", + "scripts": [], + "own_tokenizer": false }, { "name": "Popolocan-Zapotecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Popolocan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chocholtec", "iso_1_code": null, "iso_3_code": "coz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8277", + "scripts": [], + "own_tokenizer": false }, { "name": "Ixcatec", "iso_1_code": null, "iso_3_code": "ixc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8278", + "scripts": [], + "own_tokenizer": false }, { "name": "Mazatec", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mazatec, San Jer\u00f3nimo Tec\u00f3atl", "iso_1_code": null, "iso_3_code": "maa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8280", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mazatec, Jalapa de D\u00edaz", "iso_1_code": null, "iso_3_code": "maj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8281", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mazatec, Chiquihuitl\u00e1n", "iso_1_code": null, "iso_3_code": "maq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8282", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mazatec, Huautla", "iso_1_code": null, "iso_3_code": "mau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8283", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mazatec, Ixcatl\u00e1n", "iso_1_code": null, "iso_3_code": "mzi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8284", + "scripts": [], + "own_tokenizer": false }, { "name": "Mazatec, Puebla and Northeastern", "iso_1_code": null, "iso_3_code": "pbm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8285", + "scripts": [], + "own_tokenizer": false }, { "name": "Mazatec, Soyaltepec", "iso_1_code": null, "iso_3_code": "vmp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8286", + "scripts": [], + "own_tokenizer": false }, { "name": "Mazatec, Ayautla", "iso_1_code": null, "iso_3_code": "vmy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8287", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mazatec, Mazatl\u00e1n", "iso_1_code": null, "iso_3_code": "vmz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8288", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8279", + "scripts": [], + "own_tokenizer": false }, { "name": "Popoloca", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Popoloca, Mezontla", "iso_1_code": null, "iso_3_code": "pbe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8290", + "scripts": [], + "own_tokenizer": false }, { "name": "Popoloca, Coyotepec", "iso_1_code": null, "iso_3_code": "pbf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8291", + "scripts": [], + "own_tokenizer": false }, { "name": "Popoloca, Santa In\u00e9s Ahuatempan", "iso_1_code": null, "iso_3_code": "pca", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8292", + "scripts": [], + "own_tokenizer": false }, { "name": "Popoloca, San Marcos Tlacoyalco", "iso_1_code": null, "iso_3_code": "pls", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8293", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Popoloca, San Juan Atzingo", "iso_1_code": null, "iso_3_code": "poe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8294", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Popoloca, San Felipe Otlaltepec", "iso_1_code": null, "iso_3_code": "pow", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8295", + "scripts": [], + "own_tokenizer": false }, { "name": "Popoloca, San Lu\u00eds Temalacayuca", "iso_1_code": null, "iso_3_code": "pps", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8296", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8289", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8276", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chatino", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chatino, Eastern Highland", "iso_1_code": null, "iso_3_code": "cly", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8299", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chatino, Tataltepec", "iso_1_code": null, "iso_3_code": "cta", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8300", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chatino, Western Highland", "iso_1_code": null, "iso_3_code": "ctp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8301", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chatino, Zacatepec", "iso_1_code": null, "iso_3_code": "ctz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8302", + "scripts": [], + "own_tokenizer": false }, { "name": "Chatino, Nopala", "iso_1_code": null, "iso_3_code": "cya", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8303", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chatino, Zenzontepec", "iso_1_code": null, "iso_3_code": "czn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8304", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8298", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Zapotec, Sierra de Ju\u00e1rez", "iso_1_code": null, "iso_3_code": "zaa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8306", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Western Tlacolula Valley", "iso_1_code": null, "iso_3_code": "zab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8307", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Ocotl\u00e1n", "iso_1_code": null, "iso_3_code": "zac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8308", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Cajonos", "iso_1_code": null, "iso_3_code": "zad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8309", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Yareni", "iso_1_code": null, "iso_3_code": "zae", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8310", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Ayoquesco", "iso_1_code": null, "iso_3_code": "zaf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8311", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Isthmus", "iso_1_code": null, "iso_3_code": "zai", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8312", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Miahuatl\u00e1n", "iso_1_code": null, "iso_3_code": "zam", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8313", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Ozolotepec", "iso_1_code": null, "iso_3_code": "zao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8314", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Alo\u00e1pam", "iso_1_code": null, "iso_3_code": "zaq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8315", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Rinc\u00f3n", "iso_1_code": null, "iso_3_code": "zar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8316", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Santo Domingo Albarradas", "iso_1_code": null, "iso_3_code": "zas", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8317", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Tabaa", "iso_1_code": null, "iso_3_code": "zat", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8318", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Yatzachi", "iso_1_code": null, "iso_3_code": "zav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8319", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Mitla", "iso_1_code": null, "iso_3_code": "zaw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8320", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Xadani", "iso_1_code": null, "iso_3_code": "zax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8321", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Coatecas Altas", "iso_1_code": null, "iso_3_code": "zca", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8322", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Las Delicias", "iso_1_code": null, "iso_3_code": "zcd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8323", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Asunci\u00f3n Mixtepec", "iso_1_code": null, "iso_3_code": "zoo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8324", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Lachiguiri", "iso_1_code": null, "iso_3_code": "zpa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8325", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Yautepec", "iso_1_code": null, "iso_3_code": "zpb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8326", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Choapan", "iso_1_code": null, "iso_3_code": "zpc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8327", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Southeastern Ixtl\u00e1n", "iso_1_code": null, "iso_3_code": "zpd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8328", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Petapa", "iso_1_code": null, "iso_3_code": "zpe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8329", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, San Pedro Quiatoni", "iso_1_code": null, "iso_3_code": "zpf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8330", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Guevea de Humboldt", "iso_1_code": null, "iso_3_code": "zpg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8331", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Totomachapan", "iso_1_code": null, "iso_3_code": "zph", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8332", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Santa Mar\u00eda Quiegolani", "iso_1_code": null, "iso_3_code": "zpi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8333", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Quiavicuzas", "iso_1_code": null, "iso_3_code": "zpj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8334", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Tlacolulita", "iso_1_code": null, "iso_3_code": "zpk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8335", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Lachix\u00edo", "iso_1_code": null, "iso_3_code": "zpl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8336", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Mixtepec", "iso_1_code": null, "iso_3_code": "zpm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8337", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Santa In\u00e9s Yatzechi", "iso_1_code": null, "iso_3_code": "zpn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8338", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Amatl\u00e1n", "iso_1_code": null, "iso_3_code": "zpo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8339", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, El Alto", "iso_1_code": null, "iso_3_code": "zpp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8340", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Zoogocho", "iso_1_code": null, "iso_3_code": "zpq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8341", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Santiago Xanica", "iso_1_code": null, "iso_3_code": "zpr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8342", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Coatl\u00e1n", "iso_1_code": null, "iso_3_code": "zps", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8343", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, San Vicente Coatl\u00e1n", "iso_1_code": null, "iso_3_code": "zpt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8344", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Yal\u00e1lag", "iso_1_code": null, "iso_3_code": "zpu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8345", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Chichicapan", "iso_1_code": null, "iso_3_code": "zpv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8346", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Zaniza", "iso_1_code": null, "iso_3_code": "zpw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8347", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, San Baltazar Loxicha", "iso_1_code": null, "iso_3_code": "zpx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8348", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Mazaltepec", "iso_1_code": null, "iso_3_code": "zpy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8349", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Texmelucan", "iso_1_code": null, "iso_3_code": "zpz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8350", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Southern Rincon", "iso_1_code": null, "iso_3_code": "zsr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8351", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Elotepec", "iso_1_code": null, "iso_3_code": "zte", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8352", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Xanagu\u00eda", "iso_1_code": null, "iso_3_code": "ztg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8353", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Lapagu\u00eda-Guivini", "iso_1_code": null, "iso_3_code": "ztl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8354", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, San Agust\u00edn Mixtepec", "iso_1_code": null, "iso_3_code": "ztm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8355", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Santa Catarina Albarradas", "iso_1_code": null, "iso_3_code": "ztn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8356", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Loxicha", "iso_1_code": null, "iso_3_code": "ztp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8357", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Quioquitani-Quier\u00ed", "iso_1_code": null, "iso_3_code": "ztq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8358", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zapotec, Tilquiapan", "iso_1_code": null, "iso_3_code": "zts", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8359", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Tejalapan", "iso_1_code": null, "iso_3_code": "ztt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8360", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, G\u00fcil\u00e1", "iso_1_code": null, "iso_3_code": "ztu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8361", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Zaachila", "iso_1_code": null, "iso_3_code": "ztx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8362", + "scripts": [], + "own_tokenizer": false }, { "name": "Zapotec, Yatee", "iso_1_code": null, "iso_3_code": "zty", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8363", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8305", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8297", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8275", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8208", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Otomanguean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Oto-Pame-Chinantecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chinantecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chinantec, Comaltepec", "iso_1_code": null, "iso_3_code": "cco", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8367", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Ojitl\u00e1n", "iso_1_code": null, "iso_3_code": "chj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8368", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Quiotepec", "iso_1_code": null, "iso_3_code": "chq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8369", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Ozumac\u00edn", "iso_1_code": null, "iso_3_code": "chz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8370", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Lealao", "iso_1_code": null, "iso_3_code": "cle", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8371", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Lalana", "iso_1_code": null, "iso_3_code": "cnl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8372", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Tepetotutla", "iso_1_code": null, "iso_3_code": "cnt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8373", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Palantla", "iso_1_code": null, "iso_3_code": "cpa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8374", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Chiltepec", "iso_1_code": null, "iso_3_code": "csa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8375", + "scripts": [], + "own_tokenizer": false }, { "name": "Chinantec, Sochiapam", "iso_1_code": null, "iso_3_code": "cso", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8376", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Tepinapa", "iso_1_code": null, "iso_3_code": "cte", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8377", + "scripts": [], + "own_tokenizer": false }, { "name": "Chinantec, Tlacoatzintepec", "iso_1_code": null, "iso_3_code": "ctl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8378", + "scripts": [], + "own_tokenizer": false }, { "name": "Chinantec, Usila", "iso_1_code": null, "iso_3_code": "cuc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8379", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chinantec, Valle Nacional", "iso_1_code": null, "iso_3_code": "cvn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8380", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8366", + "scripts": [], + "own_tokenizer": false }, { "name": "Oto-Pamean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chichimeco-Jonaz", "iso_1_code": null, "iso_3_code": "pei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8382", + "scripts": [], + "own_tokenizer": false }, { "name": "Matlatzinca-Ocuilteco", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Matlatzinca, San Francisco", "iso_1_code": null, "iso_3_code": "mat", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8384", + "scripts": [], + "own_tokenizer": false }, { "name": "Matlatzinca, Atzingo", "iso_1_code": null, "iso_3_code": "ocu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8385", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8383", + "scripts": [], + "own_tokenizer": false }, { "name": "Mazahua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mazahua, Central", "iso_1_code": null, "iso_3_code": "maz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8387", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mazahua, Michoac\u00e1n", "iso_1_code": null, "iso_3_code": "mmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8388", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8386", + "scripts": [], + "own_tokenizer": false }, { "name": "Otomi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Otomi, Mezquital", "iso_1_code": null, "iso_3_code": "ote", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8390", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Otomi, Tilapa", "iso_1_code": null, "iso_3_code": "otl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8391", + "scripts": [], + "own_tokenizer": false }, { "name": "Otomi, Eastern Highland", "iso_1_code": null, "iso_3_code": "otm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8392", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Otomi, Tenango", "iso_1_code": null, "iso_3_code": "otn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8393", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Otomi, Quer\u00e9taro", "iso_1_code": null, "iso_3_code": "otq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8394", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Otom\u00ed, Estado de M\u00e9xico", "iso_1_code": null, "iso_3_code": "ots", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8395", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Otomi, Temoaya", "iso_1_code": null, "iso_3_code": "ott", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8396", + "scripts": [], + "own_tokenizer": false }, { "name": "Otomi, Texcatepec", "iso_1_code": null, "iso_3_code": "otx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8397", + "scripts": [], + "own_tokenizer": false }, { "name": "Otomi, Ixtenco", "iso_1_code": null, "iso_3_code": "otz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8398", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8389", + "scripts": [], + "own_tokenizer": false }, { "name": "Pame", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pame, Central", "iso_1_code": null, "iso_3_code": "pbs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8400", + "scripts": [], + "own_tokenizer": false }, { "name": "Pame, Northern", "iso_1_code": null, "iso_3_code": "pmq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8401", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pame, Southern", "iso_1_code": null, "iso_3_code": "pmz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8402", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8399", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8381", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8365", + "scripts": [], + "own_tokenizer": false }, { "name": "Tlapanec-Manguean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Manguean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chiapanec", "iso_1_code": null, "iso_3_code": "cip", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8405", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8404", + "scripts": [], + "own_tokenizer": false }, { "name": "Tlapanec-Subtiaba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Subtiaba", "iso_1_code": null, "iso_3_code": "sut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8407", + "scripts": [], + "own_tokenizer": false }, { "name": "Tlapanec", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Me\u2019phaa, Malinaltepec", "iso_1_code": null, "iso_3_code": "tcf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8409", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Me\u2019phaa, Azoy\u00fa", "iso_1_code": null, "iso_3_code": "tpc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8410", + "scripts": [], + "own_tokenizer": false }, { "name": "Me\u2019phaa, Tlacoapa", "iso_1_code": null, "iso_3_code": "tpl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8411", + "scripts": [], + "own_tokenizer": false }, { "name": "Me\u2019phaa, Acatepec", "iso_1_code": null, "iso_3_code": "tpx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8412", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8408", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8406", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8403", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8364", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8207", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Paezan.json b/data/Paezan.json index 93617992a459667927fed909b525f90df9683f56..6cb1b0f34234ffc35ca6c16b47bce5b93c3bd1be 100644 --- a/data/Paezan.json +++ b/data/Paezan.json @@ -2,74 +2,96 @@ "name": "Paezan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andaqui", "iso_1_code": null, "iso_3_code": "ana", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8414", + "scripts": [], + "own_tokenizer": false }, { "name": "Coconuco", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anserma", "iso_1_code": null, "iso_3_code": "ans", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8416", + "scripts": [], + "own_tokenizer": false }, { "name": "Caramanta", "iso_1_code": null, "iso_3_code": "crf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8417", + "scripts": [], + "own_tokenizer": false }, { "name": "Misak", "iso_1_code": null, "iso_3_code": "gum", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8418", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Totoro", "iso_1_code": null, "iso_3_code": "ttk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8419", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8415", + "scripts": [], + "own_tokenizer": false }, { "name": "Paezan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nasa", "iso_1_code": null, "iso_3_code": "pbb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8421", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8420", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8413", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Palaihnihan.json b/data/Palaihnihan.json index e7ca8d1aacf3ddd17f32719ea088a3a2c8a5fa26..dc2a12360cf7dcaa1a5bfebd256fe58a763a7a16 100644 --- a/data/Palaihnihan.json +++ b/data/Palaihnihan.json @@ -2,24 +2,30 @@ "name": "Palaihnihan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Achumawi", "iso_1_code": null, "iso_3_code": "acv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8423", + "scripts": [], + "own_tokenizer": false }, { "name": "Atsugewi", "iso_1_code": null, "iso_3_code": "atw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8424", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8422", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Panoan.json b/data/Panoan.json index f689eb1b4c1d9865452617792268b2d0bbdd205e..db4e645c244b73cd085844c8de9d2e449af6a876 100644 --- a/data/Panoan.json +++ b/data/Panoan.json @@ -2,314 +2,406 @@ "name": "Panoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaxarar\u00ed", "iso_1_code": null, "iso_3_code": "ktx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8426", + "scripts": [], + "own_tokenizer": false }, { "name": "Pisabo", "iso_1_code": null, "iso_3_code": "pig", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8427", + "scripts": [], + "own_tokenizer": false }, { "name": "Sensi", "iso_1_code": null, "iso_3_code": "sni", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8428", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulina Pano", "iso_1_code": null, "iso_3_code": "xpk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8429", + "scripts": [], + "own_tokenizer": false }, { "name": "Bolivian Panoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pacahuara", "iso_1_code": null, "iso_3_code": "pcp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8431", + "scripts": [], + "own_tokenizer": false }, { "name": "Ch\u00e1kobo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ch\u00e1cobo", "iso_1_code": null, "iso_3_code": "cao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8433", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8432", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8430", + "scripts": [], + "own_tokenizer": false }, { "name": "Mainline", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Poyan\u00e1wa", "iso_1_code": null, "iso_3_code": "pyn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8435", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuxin\u00e1wa", "iso_1_code": null, "iso_3_code": "tux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8436", + "scripts": [], + "own_tokenizer": false }, { "name": "Cashibo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kakataibo-Kashibo", "iso_1_code": null, "iso_3_code": "cbr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8438", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8437", + "scripts": [], + "own_tokenizer": false }, { "name": "Pano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Panobo", "iso_1_code": null, "iso_3_code": "pno", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8439", + "scripts": [], + "own_tokenizer": false }, { "name": "Shipibo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Capanahua", "iso_1_code": null, "iso_3_code": "kaq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8442", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Katuk\u00edna, Panoan", "iso_1_code": null, "iso_3_code": "knt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8443", + "scripts": [], + "own_tokenizer": false }, { "name": "Marubo", "iso_1_code": null, "iso_3_code": "mzr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8444", + "scripts": [], + "own_tokenizer": false }, { "name": "Remo", "iso_1_code": null, "iso_3_code": "rem", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8445", + "scripts": [], + "own_tokenizer": false }, { "name": "Shipibo-Conibo", "iso_1_code": null, "iso_3_code": "shp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8446", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8441", + "scripts": [], + "own_tokenizer": false }, { "name": "Tri-State", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kashinawa", "iso_1_code": null, "iso_3_code": "cbs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8448", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sharanahua", "iso_1_code": null, "iso_3_code": "mcd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8449", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yaminahua", "iso_1_code": null, "iso_3_code": "yaa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8450", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yawanawa", "iso_1_code": null, "iso_3_code": "ywn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8451", + "scripts": [], + "own_tokenizer": false }, { "name": "Amawaka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amahuaca", "iso_1_code": null, "iso_3_code": "amc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8453", + "scripts": [], + "own_tokenizer": false }, { "name": "Isconahua", "iso_1_code": null, "iso_3_code": "isc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8454", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8452", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8447", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Atsahuaca", "iso_1_code": null, "iso_3_code": "atc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8456", + "scripts": [], + "own_tokenizer": false }, { "name": "Yora", "iso_1_code": null, "iso_3_code": "mts", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8457", + "scripts": [], + "own_tokenizer": false }, { "name": "Nukuini", "iso_1_code": null, "iso_3_code": "nuc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8458", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8455", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8434", + "scripts": [], + "own_tokenizer": false }, { "name": "Mayoruna-Mats\u00e9s", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Matses", "iso_1_code": null, "iso_3_code": "mcf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8460", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mat\u00eds", "iso_1_code": null, "iso_3_code": "mpq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8461", + "scripts": [], + "own_tokenizer": false }, { "name": "Korubo", "iso_1_code": null, "iso_3_code": "xor", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8462", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8459", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8425", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Pauwasi.json b/data/Pauwasi.json index c517748463aa9853352ecd4f1247613b9d5d0125..400a507e298d54258d407db30a88dfffd99aa299 100644 --- a/data/Pauwasi.json +++ b/data/Pauwasi.json @@ -2,66 +2,84 @@ "name": "Pauwasi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Emem", "iso_1_code": null, "iso_3_code": "enr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8465", + "scripts": [], + "own_tokenizer": false }, { "name": "Zorop", "iso_1_code": null, "iso_3_code": "wfg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8466", + "scripts": [], + "own_tokenizer": false }, { "name": "Karkar-Yuri", "iso_1_code": null, "iso_3_code": "yuj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8467", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8464", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tebi", "iso_1_code": null, "iso_3_code": "dmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8469", + "scripts": [], + "own_tokenizer": false }, { "name": "Towei", "iso_1_code": null, "iso_3_code": "ttn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8470", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8468", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8463", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Piawi.json b/data/Piawi.json index 2bbe5fc63831a87c9bc2c9a077ed59f15100198a..8283e796b21e518ebbe8a106d12d0006915d9f82 100644 --- a/data/Piawi.json +++ b/data/Piawi.json @@ -2,24 +2,32 @@ "name": "Piawi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pinai-Hagahai", "iso_1_code": null, "iso_3_code": "pnn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8472", + "scripts": [], + "own_tokenizer": false }, { "name": "Haruai", "iso_1_code": null, "iso_3_code": "tmd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8473", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8471", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Pidgin.json b/data/Pidgin.json index 07a249188a5357e96077e7c64724300cb22e8b57..f262ab36e9d6fc243029cac3df895140bd70cdfd 100644 --- a/data/Pidgin.json +++ b/data/Pidgin.json @@ -2,270 +2,336 @@ "name": "Pidgin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mobilian", "iso_1_code": null, "iso_3_code": "mod", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8475", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndyuka-Trio Pidgin", "iso_1_code": null, "iso_3_code": "njt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8476", + "scripts": [], + "own_tokenizer": false }, { "name": "Amerindian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chinook Wawa", "iso_1_code": null, "iso_3_code": "chn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8478", + "scripts": [], + "own_tokenizer": false }, { "name": "Delaware, Pidgin", "iso_1_code": null, "iso_3_code": "dep", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8479", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8477", + "scripts": [], + "own_tokenizer": false }, { "name": "Assamese based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nefamese", "iso_1_code": null, "iso_3_code": "nef", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8481", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8480", + "scripts": [], + "own_tokenizer": false }, { "name": "English based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Atlantic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Liberian English", "iso_1_code": null, "iso_3_code": "lir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8484", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8483", + "scripts": [], + "own_tokenizer": false }, { "name": "Pacific", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chinese Pidgin English", "iso_1_code": null, "iso_3_code": "cpi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8486", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8485", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8482", + "scripts": [], + "own_tokenizer": false }, { "name": "French based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tay Boi", "iso_1_code": null, "iso_3_code": "tas", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8488", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8487", + "scripts": [], + "own_tokenizer": false }, { "name": "Hausa based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Barikanchi", "iso_1_code": null, "iso_3_code": "bxo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8490", + "scripts": [], + "own_tokenizer": false }, { "name": "Gibanawa", "iso_1_code": null, "iso_3_code": "gib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8491", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8489", + "scripts": [], + "own_tokenizer": false }, { "name": "Iha based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Iha Based Pidgin", "iso_1_code": null, "iso_3_code": "ihb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8493", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8492", + "scripts": [], + "own_tokenizer": false }, { "name": "Malay based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Broome Pearling Lugger Pidgin", "iso_1_code": null, "iso_3_code": "bpl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8495", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8494", + "scripts": [], + "own_tokenizer": false }, { "name": "Motu based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Motu, Hiri", "iso_1_code": "ho", "iso_3_code": "hmo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8497", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8496", + "scripts": [], + "own_tokenizer": false }, { "name": "Onin based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Onin Based Pidgin", "iso_1_code": null, "iso_3_code": "onx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8499", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8498", + "scripts": [], + "own_tokenizer": false }, { "name": "Romance based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lingua Franca", "iso_1_code": null, "iso_3_code": "pml", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8501", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8500", + "scripts": [], + "own_tokenizer": false }, { "name": "Swahili based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Settla", "iso_1_code": null, "iso_3_code": "sta", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8503", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8502", + "scripts": [], + "own_tokenizer": false }, { "name": "Zulu based", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pidgin Bantu", "iso_1_code": null, "iso_3_code": "fng", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8505", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8504", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8474", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Pomoan.json b/data/Pomoan.json index 7ff407d77fae7472ce9f59bf6ced49ab4a99fbc1..25a896edb9015db8423569f05072ad9e394888a8 100644 --- a/data/Pomoan.json +++ b/data/Pomoan.json @@ -2,82 +2,102 @@ "name": "Pomoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pomo, Eastern", "iso_1_code": null, "iso_3_code": "peb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8507", + "scripts": [], + "own_tokenizer": false }, { "name": "Pomo, Northeastern", "iso_1_code": null, "iso_3_code": "pef", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8508", + "scripts": [], + "own_tokenizer": false }, { "name": "Pomo, Southeastern", "iso_1_code": null, "iso_3_code": "pom", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8509", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pomo, Northern", "iso_1_code": null, "iso_3_code": "pej", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8511", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kashaya", "iso_1_code": null, "iso_3_code": "kju", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8513", + "scripts": [], + "own_tokenizer": false }, { "name": "Pomo, Southern", "iso_1_code": null, "iso_3_code": "peq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8514", + "scripts": [], + "own_tokenizer": false }, { "name": "Pomo, Central", "iso_1_code": null, "iso_3_code": "poo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8515", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8512", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8510", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8506", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Puinavean.json b/data/Puinavean.json index 400c6ddf41a786a69e4d182590b947cfd9af5fa1..c1e91d56773a68c2112475fb2ee68dfe5d6fda31 100644 --- a/data/Puinavean.json +++ b/data/Puinavean.json @@ -2,91 +2,119 @@ "name": "Puinavean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "D\u00e2w", "iso_1_code": null, "iso_3_code": "kwa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8517", + "scripts": [], + "own_tokenizer": false }, { "name": "Puinave", "iso_1_code": null, "iso_3_code": "pui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8518", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cacua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cacua", "iso_1_code": null, "iso_3_code": "cbv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8520", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nukak Mak\u00fa", "iso_1_code": null, "iso_3_code": "mbr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8519", + "scripts": [], + "own_tokenizer": false }, { "name": "Hupda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hupd\u00eb", "iso_1_code": null, "iso_3_code": "jup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8523", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuhup", "iso_1_code": null, "iso_3_code": "yab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8524", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8522", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaburi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nad\u00ebb", "iso_1_code": null, "iso_3_code": "mbj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8526", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8525", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8516", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Purian.json b/data/Purian.json index 10e9c373848bd5bb0426515a39f84b83371b1099..fa58b6c26b53db8fd6f41ed57fc8e6bb0a06ed9e 100644 --- a/data/Purian.json +++ b/data/Purian.json @@ -2,24 +2,30 @@ "name": "Purian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Puri", "iso_1_code": null, "iso_3_code": "prr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8528", + "scripts": [], + "own_tokenizer": false }, { "name": "Korop\u00f3", "iso_1_code": null, "iso_3_code": "xxr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8529", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8527", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Quechuan.json b/data/Quechuan.json index 9b0461467483d32f6858794b8e2dc8ecab8c20be..160515e93334d95dbe605d1c98cf97be96e3cb8b 100644 --- a/data/Quechuan.json +++ b/data/Quechuan.json @@ -2,486 +2,660 @@ "name": "Quechuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central Quechua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Pacaraos", "iso_1_code": "qu", "iso_3_code": "qvp", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8532", + "scripts": [], + "own_tokenizer": false }, { "name": "Ap-am-ah", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Huallaga", "iso_1_code": "qu", "iso_3_code": "qub", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8534", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Ambo-Pasco", "iso_1_code": "qu", "iso_3_code": "qva", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8535", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Panao", "iso_1_code": "qu", "iso_3_code": "qxh", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8536", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Alto Mara\u00f1\u00f3n", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Huamal\u00edes-Dos de Mayo", "iso_1_code": "qu", "iso_3_code": "qvh", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8538", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Margos-Yarowilca-Lauricocha", "iso_1_code": "qu", "iso_3_code": "qvm", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8539", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8537", + "scripts": [], + "own_tokenizer": false }, { "name": "Alto Pativilca", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Cajatambo North Lima", "iso_1_code": "qu", "iso_3_code": "qvl", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8541", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, Chiqui\u00e1n", "iso_1_code": "qu", "iso_3_code": "qxa", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8542", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8540", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8533", + "scripts": [], + "own_tokenizer": false }, { "name": "Wankay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Chaupihuaranga", "iso_1_code": "qu", "iso_3_code": "qur", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8544", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, Yauyos", "iso_1_code": "qu", "iso_3_code": "qux", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8545", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, North Jun\u00edn", "iso_1_code": "qu", "iso_3_code": "qvn", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8546", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Huaylla Wanca", "iso_1_code": "qu", "iso_3_code": "qvw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8547", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Chincha", "iso_1_code": "qu", "iso_3_code": "qxc", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8548", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, Santa Ana de Tusi Pasco", "iso_1_code": "qu", "iso_3_code": "qxt", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8549", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, Jauja Wanca", "iso_1_code": "qu", "iso_3_code": "qxw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8550", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8543", + "scripts": [], + "own_tokenizer": false }, { "name": "Waylay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Corongo Ancash", "iso_1_code": "qu", "iso_3_code": "qwa", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8552", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, Huaylas Ancash", "iso_1_code": "qu", "iso_3_code": "qwh", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8553", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Sihuas Ancash", "iso_1_code": "qu", "iso_3_code": "qws", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8554", + "scripts": [], + "own_tokenizer": false }, { "name": "Conchucos", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Northern Conchucos", "iso_1_code": "qu", "iso_3_code": "qxn", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8556", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Southern Conchucos", "iso_1_code": "qu", "iso_3_code": "qxo", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8557", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8555", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8551", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8531", + "scripts": [], + "own_tokenizer": false }, { "name": "Peripheral Quechua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chinchay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern Chinchay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Inga", "iso_1_code": null, "iso_3_code": "inb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8561", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Inga, Jungle", "iso_1_code": null, "iso_3_code": "inj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8562", + "scripts": [], + "own_tokenizer": false }, { "name": "Quichua, Calder\u00f3n Highland", "iso_1_code": "qu", "iso_3_code": "qud", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8563", + "scripts": [], + "own_tokenizer": false }, { "name": "Quichua, Chimborazo Highland", "iso_1_code": "qu", "iso_3_code": "qug", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8564", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Chachapoyas", "iso_1_code": "qu", "iso_3_code": "quk", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8565", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, Southern Pastaza", "iso_1_code": "qu", "iso_3_code": "qup", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8566", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quichua, Tena Lowland", "iso_1_code": "qu", "iso_3_code": "quw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8567", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quichua, Imbabura Highland", "iso_1_code": "qu", "iso_3_code": "qvi", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8568", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quichua, Loja Highland", "iso_1_code": "qu", "iso_3_code": "qvj", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8569", + "scripts": [], + "own_tokenizer": false }, { "name": "Quichua, Napo", "iso_1_code": "qu", "iso_3_code": "qvo", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8570", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, San Mart\u00edn", "iso_1_code": "qu", "iso_3_code": "qvs", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8571", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quichua, Northern Pastaza", "iso_1_code": "qu", "iso_3_code": "qvz", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8572", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quichua, Salasaca Highland", "iso_1_code": "qu", "iso_3_code": "qxl", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8573", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quichua, Ca\u00f1ar Highland", "iso_1_code": "qu", "iso_3_code": "qxr", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8574", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8560", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Chinchay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, South Bolivian", "iso_1_code": "qu", "iso_3_code": "quh", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8576", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, North Bolivian", "iso_1_code": "qu", "iso_3_code": "qul", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8577", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quichua, Santiago del Estero", "iso_1_code": "qu", "iso_3_code": "qus", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8578", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Southern Peruvian Quechua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Ayacucho", "iso_1_code": "qu", "iso_3_code": "quy", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8580", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Cusco", "iso_1_code": "qu", "iso_3_code": "quz", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8581", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Eastern Apur\u00edmac", "iso_1_code": "qu", "iso_3_code": "qve", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8582", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Puno", "iso_1_code": "qu", "iso_3_code": "qxp", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8583", + "scripts": [], + "own_tokenizer": false }, { "name": "Quechua, Arequipa-La Uni\u00f3n", "iso_1_code": "qu", "iso_3_code": "qxu", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8584", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8579", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8575", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8559", + "scripts": [], + "own_tokenizer": false }, { "name": "Yungay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Quechua, Lambayeque", "iso_1_code": "qu", "iso_3_code": "quf", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8587", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Quechua, Cajamarca", "iso_1_code": "qu", "iso_3_code": "qvc", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8588", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8586", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8585", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8558", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8530", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Ramu-Lower Sepik.json b/data/Ramu-Lower Sepik.json index d158553558149050cdabc168bf274eadf1436a50..b115f56f85a2bc34e61f96dbdb98afdb5b9ca844 100644 --- a/data/Ramu-Lower Sepik.json +++ b/data/Ramu-Lower Sepik.json @@ -2,408 +2,514 @@ "name": "Ramu-Lower Sepik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kambot", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ap Ma", "iso_1_code": null, "iso_3_code": "kbx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8591", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8590", + "scripts": [], + "own_tokenizer": false }, { "name": "Lower Sepik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angoram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angoram", "iso_1_code": null, "iso_3_code": "aog", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8594", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8593", + "scripts": [], + "own_tokenizer": false }, { "name": "Chambri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chambri", "iso_1_code": null, "iso_3_code": "can", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8596", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8595", + "scripts": [], + "own_tokenizer": false }, { "name": "Karawari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karawari", "iso_1_code": null, "iso_3_code": "tzx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8598", + "scripts": [], + "own_tokenizer": false }, { "name": "Yimas", "iso_1_code": null, "iso_3_code": "yee", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8599", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8597", + "scripts": [], + "own_tokenizer": false }, { "name": "Nor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Murik", "iso_1_code": null, "iso_3_code": "mtf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8601", + "scripts": [], + "own_tokenizer": false }, { "name": "Kopar", "iso_1_code": null, "iso_3_code": "xop", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8602", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8600", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8592", + "scripts": [], + "own_tokenizer": false }, { "name": "Ramu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Grass", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abu", "iso_1_code": null, "iso_3_code": "ado", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8605", + "scripts": [], + "own_tokenizer": false }, { "name": "Ambakich", "iso_1_code": null, "iso_3_code": "aew", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8606", + "scripts": [], + "own_tokenizer": false }, { "name": "Waran", "iso_1_code": null, "iso_3_code": "byz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8607", + "scripts": [], + "own_tokenizer": false }, { "name": "Gorovu", "iso_1_code": null, "iso_3_code": "grq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8608", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8604", + "scripts": [], + "own_tokenizer": false }, { "name": "Middle Ramu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aiome", "iso_1_code": null, "iso_3_code": "aki", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8610", + "scripts": [], + "own_tokenizer": false }, { "name": "Anor", "iso_1_code": null, "iso_3_code": "anj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8611", + "scripts": [], + "own_tokenizer": false }, { "name": "Rao", "iso_1_code": null, "iso_3_code": "rao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8612", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8609", + "scripts": [], + "own_tokenizer": false }, { "name": "Mikarew", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kire", "iso_1_code": null, "iso_3_code": "geb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8614", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Aruamu", "iso_1_code": null, "iso_3_code": "msy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8615", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Akukem", "iso_1_code": null, "iso_3_code": "spm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8616", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8613", + "scripts": [], + "own_tokenizer": false }, { "name": "Ottilien", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Borei", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mbore", "iso_1_code": null, "iso_3_code": "gai", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8619", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8618", + "scripts": [], + "own_tokenizer": false }, { "name": "Bosmun-Awar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awar", "iso_1_code": null, "iso_3_code": "aya", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8621", + "scripts": [], + "own_tokenizer": false }, { "name": "Bosmun", "iso_1_code": null, "iso_3_code": "bqs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8622", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8620", + "scripts": [], + "own_tokenizer": false }, { "name": "Watam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kayan", "iso_1_code": null, "iso_3_code": "kct", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8624", + "scripts": [], + "own_tokenizer": false }, { "name": "Marangis", "iso_1_code": null, "iso_3_code": "wax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8625", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8623", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8617", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamolan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chini", "iso_1_code": null, "iso_3_code": "afi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8627", + "scripts": [], + "own_tokenizer": false }, { "name": "Breri", "iso_1_code": null, "iso_3_code": "brq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8628", + "scripts": [], + "own_tokenizer": false }, { "name": "Igana", "iso_1_code": null, "iso_3_code": "igg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8629", + "scripts": [], + "own_tokenizer": false }, { "name": "Inapang", "iso_1_code": null, "iso_3_code": "mzu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8630", + "scripts": [], + "own_tokenizer": false }, { "name": "Romkun", "iso_1_code": null, "iso_3_code": "rmk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8631", + "scripts": [], + "own_tokenizer": false }, { "name": "Kominimung", "iso_1_code": null, "iso_3_code": "xoi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8632", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8626", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanggu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andarum", "iso_1_code": null, "iso_3_code": "aod", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8634", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanggape", "iso_1_code": null, "iso_3_code": "igm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8635", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanguat", "iso_1_code": null, "iso_3_code": "tbs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8636", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanggu", "iso_1_code": null, "iso_3_code": "tgu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8637", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8633", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8603", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8589", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Sahaptian.json b/data/Sahaptian.json index 07198b3c06284df309218dc9473f7250033a25a8..54eb8213907a01ec731cc4a205ac7d6523823480 100644 --- a/data/Sahaptian.json +++ b/data/Sahaptian.json @@ -2,57 +2,71 @@ "name": "Sahaptian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nez Perce", "iso_1_code": null, "iso_3_code": "nez", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8639", + "scripts": [], + "own_tokenizer": false }, { "name": "Sahaptin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tenino", "iso_1_code": null, "iso_3_code": "tqn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8641", + "scripts": [], + "own_tokenizer": false }, { "name": "Umatilla", "iso_1_code": null, "iso_3_code": "uma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8642", + "scripts": [], + "own_tokenizer": false }, { "name": "Walla Walla", "iso_1_code": null, "iso_3_code": "waa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8643", + "scripts": [], + "own_tokenizer": false }, { "name": "Yakama", "iso_1_code": null, "iso_3_code": "yak", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8644", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8640", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8638", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Salish.json b/data/Salish.json index 2e4fc9934f445693ac6a60b5e0f30109120401da..cdb40297dbed6f5c97a9ee505f17ebf811406c31 100644 --- a/data/Salish.json +++ b/data/Salish.json @@ -2,305 +2,379 @@ "name": "Salish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bella Coola", "iso_1_code": null, "iso_3_code": "blc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8646", + "scripts": [], + "own_tokenizer": false }, { "name": "Tillamook", "iso_1_code": null, "iso_3_code": "til", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8647", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Salish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Clallam", "iso_1_code": null, "iso_3_code": "clm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8649", + "scripts": [], + "own_tokenizer": false }, { "name": "Comox", "iso_1_code": null, "iso_3_code": "coo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8650", + "scripts": [], + "own_tokenizer": false }, { "name": "Halkomelem", "iso_1_code": null, "iso_3_code": "hur", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8651", + "scripts": [], + "own_tokenizer": false }, { "name": "Nooksack", "iso_1_code": null, "iso_3_code": "nok", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8652", + "scripts": [], + "own_tokenizer": false }, { "name": "Pentlatch", "iso_1_code": null, "iso_3_code": "ptw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8653", + "scripts": [], + "own_tokenizer": false }, { "name": "Sechelt", "iso_1_code": null, "iso_3_code": "sec", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8654", + "scripts": [], + "own_tokenizer": false }, { "name": "Squamish", "iso_1_code": null, "iso_3_code": "squ", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8655", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Straits Salish", "iso_1_code": null, "iso_3_code": "str", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8656", + "scripts": [], + "own_tokenizer": false }, { "name": "Twana", "iso_1_code": null, "iso_3_code": "twa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8657", + "scripts": [], + "own_tokenizer": false }, { "name": "Lushootseed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lushootseed", "iso_1_code": null, "iso_3_code": "lut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8659", + "scripts": [], + "own_tokenizer": false }, { "name": "Skagit", "iso_1_code": null, "iso_3_code": "ska", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8660", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Lushootseed", "iso_1_code": null, "iso_3_code": "slh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8661", + "scripts": [], + "own_tokenizer": false }, { "name": "Snohomish", "iso_1_code": null, "iso_3_code": "sno", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8662", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8658", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8648", + "scripts": [], + "own_tokenizer": false }, { "name": "Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lillooet", "iso_1_code": null, "iso_3_code": "lil", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8664", + "scripts": [], + "own_tokenizer": false }, { "name": "Shuswap", "iso_1_code": null, "iso_3_code": "shs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8665", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Thompson", "iso_1_code": null, "iso_3_code": "thp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8667", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8666", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Columbia-Wenatchi", "iso_1_code": null, "iso_3_code": "col", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8669", + "scripts": [], + "own_tokenizer": false }, { "name": "Coeur d\u2019Alene", "iso_1_code": null, "iso_3_code": "crd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8670", + "scripts": [], + "own_tokenizer": false }, { "name": "Okanagan", "iso_1_code": null, "iso_3_code": "oka", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8671", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalispel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kalispel-Pend d\u2019Oreille", "iso_1_code": null, "iso_3_code": "fla", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8673", + "scripts": [], + "own_tokenizer": false }, { "name": "Spokane", "iso_1_code": null, "iso_3_code": "spo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8674", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8672", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8668", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8663", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsamosan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Inland", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chehalis, Upper", "iso_1_code": null, "iso_3_code": "cjh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8677", + "scripts": [], + "own_tokenizer": false }, { "name": "Cowlitz", "iso_1_code": null, "iso_3_code": "cow", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8678", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8676", + "scripts": [], + "own_tokenizer": false }, { "name": "Maritime", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chehalis, Lower", "iso_1_code": null, "iso_3_code": "cea", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8680", + "scripts": [], + "own_tokenizer": false }, { "name": "Quinault", "iso_1_code": null, "iso_3_code": "qun", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8681", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8679", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8675", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8645", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Senagi.json b/data/Senagi.json index 2832c260d3c80465af9c9e0c6e42f930fef1a572..c5bc3b702a172b5134604da8456636a5816b4e0b 100644 --- a/data/Senagi.json +++ b/data/Senagi.json @@ -2,24 +2,32 @@ "name": "Senagi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angor", "iso_1_code": null, "iso_3_code": "agg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8683", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dla", "iso_1_code": null, "iso_3_code": "kbv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8684", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8682", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Sepik.json b/data/Sepik.json index 68fd4b3a904106c10937c84456dee940f75a760a..a606f70752ca6588a24276c5339f2118ee571736 100644 --- a/data/Sepik.json +++ b/data/Sepik.json @@ -2,575 +2,737 @@ "name": "Sepik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abau", "iso_1_code": null, "iso_3_code": "aau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8687", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8686", + "scripts": [], + "own_tokenizer": false }, { "name": "Iwam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amal", "iso_1_code": null, "iso_3_code": "aad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8689", + "scripts": [], + "own_tokenizer": false }, { "name": "Iwam", "iso_1_code": null, "iso_3_code": "iwm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8690", + "scripts": [], + "own_tokenizer": false }, { "name": "Iwam, Sepik", "iso_1_code": null, "iso_3_code": "iws", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8691", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8688", + "scripts": [], + "own_tokenizer": false }, { "name": "Leonhard Schultze", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pefiyahe", "iso_1_code": null, "iso_3_code": "ppq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8693", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuwari", "iso_1_code": null, "iso_3_code": "tww", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8694", + "scripts": [], + "own_tokenizer": false }, { "name": "Walio", "iso_1_code": null, "iso_3_code": "wla", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8695", + "scripts": [], + "own_tokenizer": false }, { "name": "Yawiyo", "iso_1_code": null, "iso_3_code": "ybx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8696", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8692", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ambulas", "iso_1_code": null, "iso_3_code": "abt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8698", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Burui", "iso_1_code": null, "iso_3_code": "bry", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8699", + "scripts": [], + "own_tokenizer": false }, { "name": "Boikin", "iso_1_code": null, "iso_3_code": "bzf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8700", + "scripts": [], + "own_tokenizer": false }, { "name": "Gaikundi", "iso_1_code": null, "iso_3_code": "gbf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8701", + "scripts": [], + "own_tokenizer": false }, { "name": "Iatmul", "iso_1_code": null, "iso_3_code": "ian", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8702", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mevembet", "iso_1_code": null, "iso_3_code": "keh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8703", + "scripts": [], + "own_tokenizer": false }, { "name": "Koiwat", "iso_1_code": null, "iso_3_code": "kxt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8704", + "scripts": [], + "own_tokenizer": false }, { "name": "Manambu", "iso_1_code": null, "iso_3_code": "mle", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8705", + "scripts": [], + "own_tokenizer": false }, { "name": "Gala", "iso_1_code": null, "iso_3_code": "nud", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8706", + "scripts": [], + "own_tokenizer": false }, { "name": "Sos Kundi", "iso_1_code": null, "iso_3_code": "sdk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8707", + "scripts": [], + "own_tokenizer": false }, { "name": "Sengo", "iso_1_code": null, "iso_3_code": "spk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8708", + "scripts": [], + "own_tokenizer": false }, { "name": "Hanga Hundi", "iso_1_code": null, "iso_3_code": "wos", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8709", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yalaku", "iso_1_code": null, "iso_3_code": "ylg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8710", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8697", + "scripts": [], + "own_tokenizer": false }, { "name": "Nukuma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kwoma", "iso_1_code": null, "iso_3_code": "kmo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8712", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kwanga", "iso_1_code": null, "iso_3_code": "kwj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8713", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mende", "iso_1_code": null, "iso_3_code": "sim", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8714", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8711", + "scripts": [], + "own_tokenizer": false }, { "name": "Ram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pouye", "iso_1_code": null, "iso_3_code": "bye", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8716", + "scripts": [], + "own_tokenizer": false }, { "name": "Awtuw", "iso_1_code": null, "iso_3_code": "kmn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8717", + "scripts": [], + "own_tokenizer": false }, { "name": "Karawa", "iso_1_code": null, "iso_3_code": "xrw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8718", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8715", + "scripts": [], + "own_tokenizer": false }, { "name": "Sepik Hill", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alamblak", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alamblak", "iso_1_code": null, "iso_3_code": "amp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8721", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kaningra", "iso_1_code": null, "iso_3_code": "knr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8722", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8720", + "scripts": [], + "own_tokenizer": false }, { "name": "Bahinemo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Berinomo", "iso_1_code": null, "iso_3_code": "bit", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8724", + "scripts": [], + "own_tokenizer": false }, { "name": "Bahinemo", "iso_1_code": null, "iso_3_code": "bjh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8725", + "scripts": [], + "own_tokenizer": false }, { "name": "Bisis", "iso_1_code": null, "iso_3_code": "bnw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8726", + "scripts": [], + "own_tokenizer": false }, { "name": "Kapriman", "iso_1_code": null, "iso_3_code": "dju", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8727", + "scripts": [], + "own_tokenizer": false }, { "name": "Mari", "iso_1_code": null, "iso_3_code": "mbx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8728", + "scripts": [], + "own_tokenizer": false }, { "name": "Sumariup", "iso_1_code": null, "iso_3_code": "siv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8729", + "scripts": [], + "own_tokenizer": false }, { "name": "Watakataui", "iso_1_code": null, "iso_3_code": "wtk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8730", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8723", + "scripts": [], + "own_tokenizer": false }, { "name": "Papi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baiyamo", "iso_1_code": null, "iso_3_code": "ppe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8732", + "scripts": [], + "own_tokenizer": false }, { "name": "Asaba", "iso_1_code": null, "iso_3_code": "seo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8733", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8731", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanio", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Niksek", "iso_1_code": null, "iso_3_code": "gbe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8735", + "scripts": [], + "own_tokenizer": false }, { "name": "Hewa", "iso_1_code": null, "iso_3_code": "ham", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8736", + "scripts": [], + "own_tokenizer": false }, { "name": "Piame", "iso_1_code": null, "iso_3_code": "pin", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8737", + "scripts": [], + "own_tokenizer": false }, { "name": "Saniyo-Hiyewe", "iso_1_code": null, "iso_3_code": "sny", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8738", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8734", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8719", + "scripts": [], + "own_tokenizer": false }, { "name": "Tama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ayi", "iso_1_code": null, "iso_3_code": "ayq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8740", + "scripts": [], + "own_tokenizer": false }, { "name": "Pahi", "iso_1_code": null, "iso_3_code": "lgt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8741", + "scripts": [], + "own_tokenizer": false }, { "name": "Mehek", "iso_1_code": null, "iso_3_code": "nux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8742", + "scripts": [], + "own_tokenizer": false }, { "name": "Pasi", "iso_1_code": null, "iso_3_code": "psq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8743", + "scripts": [], + "own_tokenizer": false }, { "name": "Yessan-Mayo", "iso_1_code": null, "iso_3_code": "yss", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8744", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kalou", "iso_1_code": null, "iso_3_code": "ywa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8745", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8739", + "scripts": [], + "own_tokenizer": false }, { "name": "Wogamusin-Chenapian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chenapian", "iso_1_code": null, "iso_3_code": "cjn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8747", + "scripts": [], + "own_tokenizer": false }, { "name": "Wogamusin", "iso_1_code": null, "iso_3_code": "wog", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8748", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8746", + "scripts": [], + "own_tokenizer": false }, { "name": "Yellow River", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ak", "iso_1_code": null, "iso_3_code": "akq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8750", + "scripts": [], + "own_tokenizer": false }, { "name": "Awun", "iso_1_code": null, "iso_3_code": "aww", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8751", + "scripts": [], + "own_tokenizer": false }, { "name": "Namia", "iso_1_code": null, "iso_3_code": "nnm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8752", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8749", + "scripts": [], + "own_tokenizer": false }, { "name": "Yerakai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yerakai", "iso_1_code": null, "iso_3_code": "yra", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8754", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8753", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8685", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Sign language.json b/data/Sign language.json index 7f5294cd16afa47bd18d18f6e3fe683145783725..2471696652cd1474aa7b7a0c7286ac8d6a26e6b4 100644 --- a/data/Sign language.json +++ b/data/Sign language.json @@ -2,1306 +2,1632 @@ "name": "Sign language", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "International Sign", "iso_1_code": null, "iso_3_code": "ils", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8756", + "scripts": [], + "own_tokenizer": false }, { "name": "Deaf community sign language", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Argentine Sign Language", "iso_1_code": null, "iso_3_code": "aed", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8758", + "scripts": [], + "own_tokenizer": false }, { "name": "Armenian Sign Language", "iso_1_code": null, "iso_3_code": "aen", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8759", + "scripts": [], + "own_tokenizer": false }, { "name": "Afghan Sign Language", "iso_1_code": null, "iso_3_code": "afg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8760", + "scripts": [], + "own_tokenizer": false }, { "name": "American Sign Language", "iso_1_code": null, "iso_3_code": "ase", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8761", + "scripts": [], + "own_tokenizer": false }, { "name": "Auslan", "iso_1_code": null, "iso_3_code": "asf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8762", + "scripts": [], + "own_tokenizer": false }, { "name": "Algerian Sign Language", "iso_1_code": null, "iso_3_code": "asp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8763", + "scripts": [], + "own_tokenizer": false }, { "name": "Austrian Sign Language", "iso_1_code": null, "iso_3_code": "asq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8764", + "scripts": [], + "own_tokenizer": false }, { "name": "British Sign Language", "iso_1_code": null, "iso_3_code": "bfi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8765", + "scripts": [], + "own_tokenizer": false }, { "name": "Malian Sign Language", "iso_1_code": null, "iso_3_code": "bog", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8766", + "scripts": [], + "own_tokenizer": false }, { "name": "Bulgarian Sign Language", "iso_1_code": null, "iso_3_code": "bqn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8767", + "scripts": [], + "own_tokenizer": false }, { "name": "Bolivian Sign Language", "iso_1_code": null, "iso_3_code": "bvl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8768", + "scripts": [], + "own_tokenizer": false }, { "name": "Brazilian Sign Language", "iso_1_code": null, "iso_3_code": "bzs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8769", + "scripts": [], + "own_tokenizer": false }, { "name": "Chadian Sign Language", "iso_1_code": null, "iso_3_code": "cds", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8770", + "scripts": [], + "own_tokenizer": false }, { "name": "Catalan Sign Language", "iso_1_code": null, "iso_3_code": "csc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8771", + "scripts": [], + "own_tokenizer": false }, { "name": "Chiangmai Sign Language", "iso_1_code": null, "iso_3_code": "csd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8772", + "scripts": [], + "own_tokenizer": false }, { "name": "Czech Sign Language", "iso_1_code": null, "iso_3_code": "cse", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8773", + "scripts": [], + "own_tokenizer": false }, { "name": "Cuban Sign Language", "iso_1_code": null, "iso_3_code": "csf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8774", + "scripts": [], + "own_tokenizer": false }, { "name": "Chilean Sign Language", "iso_1_code": null, "iso_3_code": "csg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8775", + "scripts": [], + "own_tokenizer": false }, { "name": "Chinese Sign Language", "iso_1_code": null, "iso_3_code": "csl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8776", + "scripts": [], + "own_tokenizer": false }, { "name": "Colombian Sign Language", "iso_1_code": null, "iso_3_code": "csn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8777", + "scripts": [], + "own_tokenizer": false }, { "name": "Croatian Sign Language", "iso_1_code": null, "iso_3_code": "csq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8778", + "scripts": [], + "own_tokenizer": false }, { "name": "Costa Rican Sign Language", "iso_1_code": null, "iso_3_code": "csr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8779", + "scripts": [], + "own_tokenizer": false }, { "name": "Cambodian Sign Language", "iso_1_code": null, "iso_3_code": "csx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8780", + "scripts": [], + "own_tokenizer": false }, { "name": "Dominican Sign Language", "iso_1_code": null, "iso_3_code": "doq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8781", + "scripts": [], + "own_tokenizer": false }, { "name": "Sign Language of the Netherlands", "iso_1_code": null, "iso_3_code": "dse", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8782", + "scripts": [], + "own_tokenizer": false }, { "name": "Danish Sign Language", "iso_1_code": null, "iso_3_code": "dsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8783", + "scripts": [], + "own_tokenizer": false }, { "name": "Ecuadorian Sign Language", "iso_1_code": null, "iso_3_code": "ecs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8784", + "scripts": [], + "own_tokenizer": false }, { "name": "Egyptian Sign Language", "iso_1_code": null, "iso_3_code": "esl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8785", + "scripts": [], + "own_tokenizer": false }, { "name": "Salvadoran Sign Language", "iso_1_code": null, "iso_3_code": "esn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8786", + "scripts": [], + "own_tokenizer": false }, { "name": "Estonian Sign Language", "iso_1_code": null, "iso_3_code": "eso", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8787", + "scripts": [], + "own_tokenizer": false }, { "name": "Ethiopian Sign Language", "iso_1_code": null, "iso_3_code": "eth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8788", + "scripts": [], + "own_tokenizer": false }, { "name": "Quebec Sign Language", "iso_1_code": null, "iso_3_code": "fcs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8789", + "scripts": [], + "own_tokenizer": false }, { "name": "Finnish Sign Language", "iso_1_code": null, "iso_3_code": "fse", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8790", + "scripts": [], + "own_tokenizer": false }, { "name": "French Sign Language", "iso_1_code": null, "iso_3_code": "fsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8791", + "scripts": [], + "own_tokenizer": false }, { "name": "Finland-Swedish Sign Language", "iso_1_code": null, "iso_3_code": "fss", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8792", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghanaian Sign Language", "iso_1_code": null, "iso_3_code": "gse", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8793", + "scripts": [], + "own_tokenizer": false }, { "name": "German Sign Language", "iso_1_code": null, "iso_3_code": "gsg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8794", + "scripts": [], + "own_tokenizer": false }, { "name": "Guatemalan Sign Language", "iso_1_code": null, "iso_3_code": "gsm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8795", + "scripts": [], + "own_tokenizer": false }, { "name": "Greek Sign Language", "iso_1_code": null, "iso_3_code": "gss", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8796", + "scripts": [], + "own_tokenizer": false }, { "name": "Guinean Sign Language", "iso_1_code": null, "iso_3_code": "gus", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8797", + "scripts": [], + "own_tokenizer": false }, { "name": "Hanoi Sign Language", "iso_1_code": null, "iso_3_code": "hab", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8798", + "scripts": [], + "own_tokenizer": false }, { "name": "Haiphong Sign Language", "iso_1_code": null, "iso_3_code": "haf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8799", + "scripts": [], + "own_tokenizer": false }, { "name": "Honduran Sign Language", "iso_1_code": null, "iso_3_code": "hds", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8800", + "scripts": [], + "own_tokenizer": false }, { "name": "Hong Kong Sign Language", "iso_1_code": null, "iso_3_code": "hks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8801", + "scripts": [], + "own_tokenizer": false }, { "name": "Ho Chi Minh City Sign Language", "iso_1_code": null, "iso_3_code": "hos", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8802", + "scripts": [], + "own_tokenizer": false }, { "name": "Hawai\u2018i Sign Language", "iso_1_code": null, "iso_3_code": "hps", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8803", + "scripts": [], + "own_tokenizer": false }, { "name": "Hungarian Sign Language", "iso_1_code": null, "iso_3_code": "hsh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8804", + "scripts": [], + "own_tokenizer": false }, { "name": "Hausa Sign Language", "iso_1_code": null, "iso_3_code": "hsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8805", + "scripts": [], + "own_tokenizer": false }, { "name": "Icelandic Sign Language", "iso_1_code": null, "iso_3_code": "icl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8806", + "scripts": [], + "own_tokenizer": false }, { "name": "Indonesian Sign Language", "iso_1_code": null, "iso_3_code": "inl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8807", + "scripts": [], + "own_tokenizer": false }, { "name": "Indian Sign Language", "iso_1_code": null, "iso_3_code": "ins", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8808", + "scripts": [], + "own_tokenizer": false }, { "name": "Italian Sign Language", "iso_1_code": null, "iso_3_code": "ise", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8809", + "scripts": [], + "own_tokenizer": false }, { "name": "Irish Sign Language", "iso_1_code": null, "iso_3_code": "isg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8810", + "scripts": [], + "own_tokenizer": false }, { "name": "Israeli Sign Language", "iso_1_code": null, "iso_3_code": "isr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8811", + "scripts": [], + "own_tokenizer": false }, { "name": "Jamaican Sign Language", "iso_1_code": null, "iso_3_code": "jls", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8812", + "scripts": [], + "own_tokenizer": false }, { "name": "Jordanian Sign Language", "iso_1_code": null, "iso_3_code": "jos", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8813", + "scripts": [], + "own_tokenizer": false }, { "name": "Japanese Sign Language", "iso_1_code": null, "iso_3_code": "jsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8814", + "scripts": [], + "own_tokenizer": false }, { "name": "Selangor Sign Language", "iso_1_code": null, "iso_3_code": "kgi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8815", + "scripts": [], + "own_tokenizer": false }, { "name": "Korean Sign Language", "iso_1_code": null, "iso_3_code": "kvk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8816", + "scripts": [], + "own_tokenizer": false }, { "name": "Libyan Sign Language", "iso_1_code": null, "iso_3_code": "lbs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8817", + "scripts": [], + "own_tokenizer": false }, { "name": "Guinea-Bissau Sign Language", "iso_1_code": null, "iso_3_code": "lgs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8818", + "scripts": [], + "own_tokenizer": false }, { "name": "Lithuanian Sign Language", "iso_1_code": null, "iso_3_code": "lls", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8819", + "scripts": [], + "own_tokenizer": false }, { "name": "Burundian Sign Language", "iso_1_code": null, "iso_3_code": "lsb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8820", + "scripts": [], + "own_tokenizer": false }, { "name": "Latvian Sign Language", "iso_1_code": null, "iso_3_code": "lsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8821", + "scripts": [], + "own_tokenizer": false }, { "name": "Tibetan Sign Language", "iso_1_code": null, "iso_3_code": "lsn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8822", + "scripts": [], + "own_tokenizer": false }, { "name": "Laos Sign Language", "iso_1_code": null, "iso_3_code": "lso", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8823", + "scripts": [], + "own_tokenizer": false }, { "name": "Panamanian Sign Language", "iso_1_code": null, "iso_3_code": "lsp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8824", + "scripts": [], + "own_tokenizer": false }, { "name": "Trinidad and Tobago Sign Language", "iso_1_code": null, "iso_3_code": "lst", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8825", + "scripts": [], + "own_tokenizer": false }, { "name": "Seychelles Sign Language", "iso_1_code": null, "iso_3_code": "lsw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8826", + "scripts": [], + "own_tokenizer": false }, { "name": "Mauritian Sign Language", "iso_1_code": null, "iso_3_code": "lsy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8827", + "scripts": [], + "own_tokenizer": false }, { "name": "Malawian Sign Language", "iso_1_code": null, "iso_3_code": "lws", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8828", + "scripts": [], + "own_tokenizer": false }, { "name": "Maltese Sign Language", "iso_1_code": null, "iso_3_code": "mdl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8829", + "scripts": [], + "own_tokenizer": false }, { "name": "Mexican Sign Language", "iso_1_code": null, "iso_3_code": "mfs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8830", + "scripts": [], + "own_tokenizer": false }, { "name": "Mongolian Sign Language", "iso_1_code": null, "iso_3_code": "msr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8831", + "scripts": [], + "own_tokenizer": false }, { "name": "Malagasy Sign Language", "iso_1_code": null, "iso_3_code": "mzc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8832", + "scripts": [], + "own_tokenizer": false }, { "name": "Mozambican Sign Language", "iso_1_code": null, "iso_3_code": "mzy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8833", + "scripts": [], + "own_tokenizer": false }, { "name": "Namibian Sign Language", "iso_1_code": null, "iso_3_code": "nbs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8834", + "scripts": [], + "own_tokenizer": false }, { "name": "Nicaraguan Sign Language", "iso_1_code": null, "iso_3_code": "ncs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8835", + "scripts": [], + "own_tokenizer": false }, { "name": "Nigerian Sign Language", "iso_1_code": null, "iso_3_code": "nsi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8836", + "scripts": [], + "own_tokenizer": false }, { "name": "Norwegian Sign Language", "iso_1_code": null, "iso_3_code": "nsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8837", + "scripts": [], + "own_tokenizer": false }, { "name": "Nepalese Sign Language", "iso_1_code": null, "iso_3_code": "nsp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8838", + "scripts": [], + "own_tokenizer": false }, { "name": "Maritime Sign Language", "iso_1_code": null, "iso_3_code": "nsr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8839", + "scripts": [], + "own_tokenizer": false }, { "name": "New Zealand Sign Language", "iso_1_code": null, "iso_3_code": "nzs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8840", + "scripts": [], + "own_tokenizer": false }, { "name": "Papua New Guinean Sign Language", "iso_1_code": null, "iso_3_code": "pgz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8841", + "scripts": [], + "own_tokenizer": false }, { "name": "Pakistan Sign Language", "iso_1_code": null, "iso_3_code": "pks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8842", + "scripts": [], + "own_tokenizer": false }, { "name": "Peruvian Sign Language", "iso_1_code": null, "iso_3_code": "prl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8843", + "scripts": [], + "own_tokenizer": false }, { "name": "Iranian Sign Language", "iso_1_code": null, "iso_3_code": "psc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8844", + "scripts": [], + "own_tokenizer": false }, { "name": "Penang Sign Language", "iso_1_code": null, "iso_3_code": "psg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8845", + "scripts": [], + "own_tokenizer": false }, { "name": "Puerto Rican Sign Language", "iso_1_code": null, "iso_3_code": "psl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8846", + "scripts": [], + "own_tokenizer": false }, { "name": "Polish Sign Language", "iso_1_code": null, "iso_3_code": "pso", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8847", + "scripts": [], + "own_tokenizer": false }, { "name": "Filipino Sign Language", "iso_1_code": null, "iso_3_code": "psp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8848", + "scripts": [], + "own_tokenizer": false }, { "name": "Portuguese Sign Language", "iso_1_code": null, "iso_3_code": "psr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8849", + "scripts": [], + "own_tokenizer": false }, { "name": "Paraguayan Sign Language", "iso_1_code": null, "iso_3_code": "pys", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8850", + "scripts": [], + "own_tokenizer": false }, { "name": "Romanian Sign Language", "iso_1_code": null, "iso_3_code": "rms", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8851", + "scripts": [], + "own_tokenizer": false }, { "name": "Russian Sign Language", "iso_1_code": null, "iso_3_code": "rsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8852", + "scripts": [], + "own_tokenizer": false }, { "name": "Rwandan Sign Language", "iso_1_code": null, "iso_3_code": "rsn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8853", + "scripts": [], + "own_tokenizer": false }, { "name": "Saudi Arabian Sign Language", "iso_1_code": null, "iso_3_code": "sdl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8854", + "scripts": [], + "own_tokenizer": false }, { "name": "French Belgian Sign Language", "iso_1_code": null, "iso_3_code": "sfb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8855", + "scripts": [], + "own_tokenizer": false }, { "name": "South African Sign Language", "iso_1_code": null, "iso_3_code": "sfs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8856", + "scripts": [], + "own_tokenizer": false }, { "name": "Swiss-German Sign Language", "iso_1_code": null, "iso_3_code": "sgg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8857", + "scripts": [], + "own_tokenizer": false }, { "name": "Sierra Leone Sign Language", "iso_1_code": null, "iso_3_code": "sgx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8858", + "scripts": [], + "own_tokenizer": false }, { "name": "Swiss-Italian Sign Language", "iso_1_code": null, "iso_3_code": "slf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8859", + "scripts": [], + "own_tokenizer": false }, { "name": "Singapore Sign Language", "iso_1_code": null, "iso_3_code": "sls", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8860", + "scripts": [], + "own_tokenizer": false }, { "name": "Albanian Sign Language", "iso_1_code": null, "iso_3_code": "sqk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8861", + "scripts": [], + "own_tokenizer": false }, { "name": "Sri Lankan Sign Language", "iso_1_code": null, "iso_3_code": "sqs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8862", + "scripts": [], + "own_tokenizer": false }, { "name": "Spanish Sign Language", "iso_1_code": null, "iso_3_code": "ssp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8863", + "scripts": [], + "own_tokenizer": false }, { "name": "Swiss-French Sign Language", "iso_1_code": null, "iso_3_code": "ssr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8864", + "scripts": [], + "own_tokenizer": false }, { "name": "Slovakian Sign Language", "iso_1_code": null, "iso_3_code": "svk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8865", + "scripts": [], + "own_tokenizer": false }, { "name": "Swedish Sign Language", "iso_1_code": null, "iso_3_code": "swl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8866", + "scripts": [], + "own_tokenizer": false }, { "name": "Solomon Islands Sign Language", "iso_1_code": null, "iso_3_code": "szs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8867", + "scripts": [], + "own_tokenizer": false }, { "name": "Tunisian Sign Language", "iso_1_code": null, "iso_3_code": "tse", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8868", + "scripts": [], + "own_tokenizer": false }, { "name": "Turkish Sign Language", "iso_1_code": null, "iso_3_code": "tsm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8869", + "scripts": [], + "own_tokenizer": false }, { "name": "Thai Sign Language", "iso_1_code": null, "iso_3_code": "tsq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8870", + "scripts": [], + "own_tokenizer": false }, { "name": "Taiwan Sign Language", "iso_1_code": null, "iso_3_code": "tss", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8871", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanzanian Sign Language", "iso_1_code": null, "iso_3_code": "tza", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8872", + "scripts": [], + "own_tokenizer": false }, { "name": "Ugandan Sign Language", "iso_1_code": null, "iso_3_code": "ugn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8873", + "scripts": [], + "own_tokenizer": false }, { "name": "Uruguayan Sign Language", "iso_1_code": null, "iso_3_code": "ugy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8874", + "scripts": [], + "own_tokenizer": false }, { "name": "Ukrainian Sign Language", "iso_1_code": null, "iso_3_code": "ukl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8875", + "scripts": [], + "own_tokenizer": false }, { "name": "Flemish Sign Language", "iso_1_code": null, "iso_3_code": "vgt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8876", + "scripts": [], + "own_tokenizer": false }, { "name": "Moldova Sign Language", "iso_1_code": null, "iso_3_code": "vsi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8877", + "scripts": [], + "own_tokenizer": false }, { "name": "Venezuelan Sign Language", "iso_1_code": null, "iso_3_code": "vsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8878", + "scripts": [], + "own_tokenizer": false }, { "name": "Valencian Sign Language", "iso_1_code": null, "iso_3_code": "vsv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8879", + "scripts": [], + "own_tokenizer": false }, { "name": "West Bengal Sign Language", "iso_1_code": null, "iso_3_code": "wbs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8880", + "scripts": [], + "own_tokenizer": false }, { "name": "Kenyan Sign Language", "iso_1_code": null, "iso_3_code": "xki", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8881", + "scripts": [], + "own_tokenizer": false }, { "name": "Malaysian Sign Language", "iso_1_code": null, "iso_3_code": "xml", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8882", + "scripts": [], + "own_tokenizer": false }, { "name": "Moroccan Sign Language", "iso_1_code": null, "iso_3_code": "xms", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8883", + "scripts": [], + "own_tokenizer": false }, { "name": "Slovenian Sign Language", "iso_1_code": null, "iso_3_code": "ysl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8884", + "scripts": [], + "own_tokenizer": false }, { "name": "Myanmar Sign Language", "iso_1_code": null, "iso_3_code": "ysm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8885", + "scripts": [], + "own_tokenizer": false }, { "name": "Zimbabwe Sign Language", "iso_1_code": null, "iso_3_code": "zib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8886", + "scripts": [], + "own_tokenizer": false }, { "name": "Zambian Sign Language", "iso_1_code": null, "iso_3_code": "zsl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8887", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8757", + "scripts": [], + "own_tokenizer": false }, { "name": "Shared sign language", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Adamorobe Sign Language", "iso_1_code": null, "iso_3_code": "ads", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8889", + "scripts": [], + "own_tokenizer": false }, { "name": "Algerian Jewish Sign Language", "iso_1_code": null, "iso_3_code": "ajs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8890", + "scripts": [], + "own_tokenizer": false }, { "name": "Australian Aborigines Sign Language", "iso_1_code": null, "iso_3_code": "asw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8891", + "scripts": [], + "own_tokenizer": false }, { "name": "Ban Khor Sign Language", "iso_1_code": null, "iso_3_code": "bfk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8892", + "scripts": [], + "own_tokenizer": false }, { "name": "Kata Kolok", "iso_1_code": null, "iso_3_code": "bqy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8893", + "scripts": [], + "own_tokenizer": false }, { "name": "Mardin Sign Language", "iso_1_code": null, "iso_3_code": "dsz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8894", + "scripts": [], + "own_tokenizer": false }, { "name": "Miyakubo Sign Language", "iso_1_code": null, "iso_3_code": "ehs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8895", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghandruk Sign Language", "iso_1_code": null, "iso_3_code": "gds", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8896", + "scripts": [], + "own_tokenizer": false }, { "name": "Inuit Sign Language", "iso_1_code": null, "iso_3_code": "iks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8897", + "scripts": [], + "own_tokenizer": false }, { "name": "Konchri Sain", "iso_1_code": null, "iso_3_code": "jcs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8898", + "scripts": [], + "own_tokenizer": false }, { "name": "Jhyankot Sign Language", "iso_1_code": null, "iso_3_code": "jhs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8899", + "scripts": [], + "own_tokenizer": false }, { "name": "Amami Koniya Sign Language", "iso_1_code": null, "iso_3_code": "jks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8900", + "scripts": [], + "own_tokenizer": false }, { "name": "Jumli Sign Language", "iso_1_code": null, "iso_3_code": "jus", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8901", + "scripts": [], + "own_tokenizer": false }, { "name": "Albarradas Sign Language", "iso_1_code": null, "iso_3_code": "lsc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8902", + "scripts": [], + "own_tokenizer": false }, { "name": "Sivia Sign Language", "iso_1_code": null, "iso_3_code": "lsv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8903", + "scripts": [], + "own_tokenizer": false }, { "name": "Martha\u2019s Vineyard Sign Language", "iso_1_code": null, "iso_3_code": "mre", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8904", + "scripts": [], + "own_tokenizer": false }, { "name": "Yucatec Maya Sign Language", "iso_1_code": null, "iso_3_code": "msd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8905", + "scripts": [], + "own_tokenizer": false }, { "name": "Old Kentish Sign Language", "iso_1_code": null, "iso_3_code": "okl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8906", + "scripts": [], + "own_tokenizer": false }, { "name": "Providencia Sign Language", "iso_1_code": null, "iso_3_code": "prz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8907", + "scripts": [], + "own_tokenizer": false }, { "name": "Plains Indian Sign Language", "iso_1_code": null, "iso_3_code": "psd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8908", + "scripts": [], + "own_tokenizer": false }, { "name": "Bribri Sign Language", "iso_1_code": null, "iso_3_code": "rib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8909", + "scripts": [], + "own_tokenizer": false }, { "name": "Brunca Sign Language", "iso_1_code": null, "iso_3_code": "rnb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8910", + "scripts": [], + "own_tokenizer": false }, { "name": "Miriwoong Sign Language", "iso_1_code": null, "iso_3_code": "rsm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8911", + "scripts": [], + "own_tokenizer": false }, { "name": "Kufr Qassem Sign Language", "iso_1_code": null, "iso_3_code": "sqx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8912", + "scripts": [], + "own_tokenizer": false }, { "name": "Al-Sayyid Bedouin Sign Language", "iso_1_code": null, "iso_3_code": "syy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8913", + "scripts": [], + "own_tokenizer": false }, { "name": "Tebul Sign Language", "iso_1_code": null, "iso_3_code": "tsy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8914", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaapor Sign Language", "iso_1_code": null, "iso_3_code": "uks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8915", + "scripts": [], + "own_tokenizer": false }, { "name": "Yolngu Sign Language", "iso_1_code": null, "iso_3_code": "ygs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8916", + "scripts": [], + "own_tokenizer": false }, { "name": "Yan-nhangu Sign Language", "iso_1_code": null, "iso_3_code": "yhs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8917", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8888", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8755", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Sino-Tibetan.json b/data/Sino-Tibetan.json index f32c65f192a8fc9c8ff5f690abf39e1ce3089be7..713d93368f64d15379fce29353cced7a84771094 100644 --- a/data/Sino-Tibetan.json +++ b/data/Sino-Tibetan.json @@ -2,6273 +2,9643 @@ "name": "Sino-Tibetan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Chinese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Chinese, Min Dong", "iso_1_code": "zh", "iso_3_code": "cdo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8920", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Jinyu", "iso_1_code": "zh", "iso_3_code": "cjy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8921", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Mandarin", "iso_1_code": "zh", "iso_3_code": "cmn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8922", + "scripts": [ + "Hani" + ], + "own_tokenizer": true }, { "name": "Pinghua, Northern", "iso_1_code": "zh", "iso_3_code": "cnp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8923", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Pu-Xian", "iso_1_code": "zh", "iso_3_code": "cpx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8924", + "scripts": [], + "own_tokenizer": true }, { "name": "Pinghua, Southern", "iso_1_code": "zh", "iso_3_code": "csp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8925", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Huizhou", "iso_1_code": "zh", "iso_3_code": "czh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8926", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Min Zhong", "iso_1_code": "zh", "iso_3_code": "czo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8927", + "scripts": [], + "own_tokenizer": true }, { "name": "Dungan", "iso_1_code": null, "iso_3_code": "dng", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8928", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Chinese, Gan", "iso_1_code": "zh", "iso_3_code": "gan", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8929", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Hakka", "iso_1_code": "zh", "iso_3_code": "hak", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8930", + "scripts": [ + "Latn", + "Hani" + ], + "own_tokenizer": true }, { "name": "Chinese, Xiang", "iso_1_code": "zh", "iso_3_code": "hsn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8931", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Classical", "iso_1_code": "zh", "iso_3_code": "lzh", - "tokenizer": { - "name": "literary_chinese", - "tokenizer": "StanzaTokenizer(\"lzh\")" + "tokenizers": { + "Hani": { + "full_object": "StanzaTokenizer(\"lzh\")", + "original_lang_name": "literary_chinese", + "original_lang_code": "lzh", + "scripts": [ + "Hani" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "8932", + "scripts": [ + "Hani" + ], + "own_tokenizer": true }, { "name": "Chinese, Min Bei", "iso_1_code": "zh", "iso_3_code": "mnp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8933", + "scripts": [], + "own_tokenizer": true }, { "name": "Chinese, Min Nan", "iso_1_code": "zh", "iso_3_code": "nan", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8934", + "scripts": [ + "Latn", + "Hani" + ], + "own_tokenizer": true }, { "name": "Chinese, Wu", "iso_1_code": "zh", "iso_3_code": "wuu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8935", + "scripts": [ + "Hani" + ], + "own_tokenizer": true }, { "name": "Chinese, Yue", "iso_1_code": "zh", "iso_3_code": "yue", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "8936", + "scripts": [ + "Hani" + ], + "own_tokenizer": true } - ] + ], + "node_i": "8919", + "scripts": [], + "own_tokenizer": false }, { "name": "Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Meitei", "iso_1_code": null, "iso_3_code": "mni", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8938", + "scripts": [ + "Latn", + "Beng", + "Mtei" + ], + "own_tokenizer": false }, { "name": "Angami-Pochuri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naga, Mao", "iso_1_code": null, "iso_3_code": "nbi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8940", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Angami", "iso_1_code": null, "iso_3_code": "njm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8941", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Khezha", "iso_1_code": null, "iso_3_code": "nkh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8942", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Northern Rengma", "iso_1_code": null, "iso_3_code": "nnl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8943", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Pochuri", "iso_1_code": null, "iso_3_code": "npo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8944", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Southern Rengma", "iso_1_code": null, "iso_3_code": "nre", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8945", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Chokri", "iso_1_code": null, "iso_3_code": "nri", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8946", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Sumi", "iso_1_code": null, "iso_3_code": "nsm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8947", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Poumai", "iso_1_code": null, "iso_3_code": "pmx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8948", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8939", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naga, Lotha", "iso_1_code": null, "iso_3_code": "njh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8950", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Ao", "iso_1_code": null, "iso_3_code": "njo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8951", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Sangtam", "iso_1_code": null, "iso_3_code": "nsa", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8952", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Yimchungru", "iso_1_code": null, "iso_3_code": "yim", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8953", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8949", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Digarish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Idu-Mishmi", "iso_1_code": null, "iso_3_code": "clk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8956", + "scripts": [], + "own_tokenizer": false }, { "name": "Digaro-Mishmi", "iso_1_code": null, "iso_3_code": "mhu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8957", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8955", + "scripts": [], + "own_tokenizer": false }, { "name": "Hrusish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hruso", "iso_1_code": null, "iso_3_code": "hru", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8959", + "scripts": [], + "own_tokenizer": false }, { "name": "Miji", "iso_1_code": null, "iso_3_code": "sjl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8960", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8958", + "scripts": [], + "own_tokenizer": false }, { "name": "Keman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Zakhring", "iso_1_code": null, "iso_3_code": "zkr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8962", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8961", + "scripts": [], + "own_tokenizer": false }, { "name": "Kho-Bwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bugun", "iso_1_code": null, "iso_3_code": "bgg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8964", + "scripts": [], + "own_tokenizer": false }, { "name": "Chug", "iso_1_code": null, "iso_3_code": "cvg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8965", + "scripts": [], + "own_tokenizer": false }, { "name": "Lish", "iso_1_code": null, "iso_3_code": "lsh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8966", + "scripts": [], + "own_tokenizer": false }, { "name": "Sartang", "iso_1_code": null, "iso_3_code": "onp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8967", + "scripts": [], + "own_tokenizer": false }, { "name": "Sherdukpen", "iso_1_code": null, "iso_3_code": "sdp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8968", + "scripts": [], + "own_tokenizer": false }, { "name": "Puroik", "iso_1_code": null, "iso_3_code": "suv", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8969", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8963", + "scripts": [], + "own_tokenizer": false }, { "name": "Lepcha", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lepcha", "iso_1_code": null, "iso_3_code": "lep", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8971", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8970", + "scripts": [], + "own_tokenizer": false }, { "name": "Mijish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Miju-Mishmi", "iso_1_code": null, "iso_3_code": "mxj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8973", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8972", + "scripts": [], + "own_tokenizer": false }, { "name": "Nungish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Drung", "iso_1_code": null, "iso_3_code": "duu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8975", + "scripts": [], + "own_tokenizer": false }, { "name": "Anong", "iso_1_code": null, "iso_3_code": "nun", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8976", + "scripts": [], + "own_tokenizer": false }, { "name": "Rawang", "iso_1_code": null, "iso_3_code": "raw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8977", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8974", + "scripts": [], + "own_tokenizer": false }, { "name": "Tani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Adi", "iso_1_code": null, "iso_3_code": "adi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8979", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Adi, Galo", "iso_1_code": null, "iso_3_code": "adl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8980", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Apatani", "iso_1_code": null, "iso_3_code": "apt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8981", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mising", "iso_1_code": null, "iso_3_code": "mrg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8982", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Na", "iso_1_code": null, "iso_3_code": "nbt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8983", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyishi", "iso_1_code": null, "iso_3_code": "njz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8984", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tagin", "iso_1_code": null, "iso_3_code": "tgj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8985", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8978", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8954", + "scripts": [], + "own_tokenizer": false }, { "name": "Karbi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Amri Karbi", "iso_1_code": null, "iso_3_code": "ajz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8987", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Karbi", "iso_1_code": null, "iso_3_code": "mjw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8988", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8986", + "scripts": [], + "own_tokenizer": false }, { "name": "Karenic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Karen, Bwe", "iso_1_code": null, "iso_3_code": "bwe", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8991", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayah, Eastern", "iso_1_code": null, "iso_3_code": "eky", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8992", + "scripts": [], + "own_tokenizer": false }, { "name": "Karen, Geko", "iso_1_code": null, "iso_3_code": "ghk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8993", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayaw", "iso_1_code": null, "iso_3_code": "kvl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8994", + "scripts": [], + "own_tokenizer": false }, { "name": "Karen, Geba", "iso_1_code": null, "iso_3_code": "kvq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8995", + "scripts": [], + "own_tokenizer": false }, { "name": "Kawyaw", "iso_1_code": null, "iso_3_code": "kxf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8996", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayah, Western", "iso_1_code": null, "iso_3_code": "kyu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "8997", + "scripts": [ + "Latn", + "Kali", + "Mymr" + ], + "own_tokenizer": false } - ] + ], + "node_i": "8990", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Lahta", "iso_1_code": null, "iso_3_code": "kvt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "8999", + "scripts": [], + "own_tokenizer": false }, { "name": "Yinbaw", "iso_1_code": null, "iso_3_code": "kvu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9000", + "scripts": [], + "own_tokenizer": false }, { "name": "Yintale", "iso_1_code": null, "iso_3_code": "kvy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9001", + "scripts": [], + "own_tokenizer": false }, { "name": "Zayein", "iso_1_code": null, "iso_3_code": "kxk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9002", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayan", "iso_1_code": null, "iso_3_code": "pdu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9003", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8998", + "scripts": [], + "own_tokenizer": false }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Pa\u2019o", "iso_1_code": null, "iso_3_code": "blk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9005", + "scripts": [ + "Mymr" + ], + "own_tokenizer": false }, { "name": "Karen, Pwo Eastern", "iso_1_code": null, "iso_3_code": "kjp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9006", + "scripts": [], + "own_tokenizer": false }, { "name": "Karen, Phrae Pwo", "iso_1_code": null, "iso_3_code": "kjt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9007", + "scripts": [], + "own_tokenizer": false }, { "name": "Karen, Pwo Western", "iso_1_code": null, "iso_3_code": "pwo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9008", + "scripts": [], + "own_tokenizer": false }, { "name": "Karen, Pwo Northern", "iso_1_code": null, "iso_3_code": "pww", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9009", + "scripts": [ + "Thai" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9004", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Karen, Mobwa", "iso_1_code": null, "iso_3_code": "jkm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9011", + "scripts": [], + "own_tokenizer": false }, { "name": "Karen, Paku", "iso_1_code": null, "iso_3_code": "jkp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9012", + "scripts": [], + "own_tokenizer": false }, { "name": "Karen, S\u2019gaw", "iso_1_code": null, "iso_3_code": "ksw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9013", + "scripts": [ + "Mymr" + ], + "own_tokenizer": false }, { "name": "Wewaw", "iso_1_code": null, "iso_3_code": "wea", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9014", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9010", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8989", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuki-Chin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Thaiphum", "iso_1_code": null, "iso_3_code": "cth", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9016", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Ngawn", "iso_1_code": null, "iso_3_code": "cnw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9018", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pangkhua", "iso_1_code": null, "iso_3_code": "pkh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9019", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Tawr", "iso_1_code": null, "iso_3_code": "tcp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9020", + "scripts": [], + "own_tokenizer": false }, { "name": "Lai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Bawm", "iso_1_code": null, "iso_3_code": "bgr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9022", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, Bualkhaw", "iso_1_code": null, "iso_3_code": "cbl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9023", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Falam", "iso_1_code": null, "iso_3_code": "cfm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9024", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, Hakha", "iso_1_code": null, "iso_3_code": "cnh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9025", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9021", + "scripts": [], + "own_tokenizer": false }, { "name": "Mizo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Biate", "iso_1_code": null, "iso_3_code": "biu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9027", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hmar", "iso_1_code": null, "iso_3_code": "hmr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9028", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hrangkhol", "iso_1_code": null, "iso_3_code": "hra", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9029", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mizo", "iso_1_code": null, "iso_3_code": "lus", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9030", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sakachep", "iso_1_code": null, "iso_3_code": "sch", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9031", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9026", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9017", + "scripts": [], + "own_tokenizer": false }, { "name": "Maraic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Zotung", "iso_1_code": null, "iso_3_code": "czt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9033", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, Senthang", "iso_1_code": null, "iso_3_code": "sez", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9034", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Zyphe", "iso_1_code": null, "iso_3_code": "zyp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9035", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Lautu", "iso_1_code": null, "iso_3_code": "clt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9037", + "scripts": [], + "own_tokenizer": false }, { "name": "Mara", "iso_1_code": null, "iso_3_code": "mrh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9038", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9036", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9032", + "scripts": [], + "own_tokenizer": false }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Aimol", "iso_1_code": null, "iso_3_code": "aim", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9040", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Anal", "iso_1_code": null, "iso_3_code": "anm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9041", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chiru", "iso_1_code": null, "iso_3_code": "cdf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9042", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Kharam", "iso_1_code": null, "iso_3_code": "kfw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9043", + "scripts": [], + "own_tokenizer": false }, { "name": "Kom", "iso_1_code": null, "iso_3_code": "kmm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9044", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lamkang", "iso_1_code": null, "iso_3_code": "lmk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9045", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Chothe", "iso_1_code": null, "iso_3_code": "nct", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9046", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Monsang", "iso_1_code": null, "iso_3_code": "nmh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9047", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Tarao", "iso_1_code": null, "iso_3_code": "tro", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9048", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9039", + "scripts": [], + "own_tokenizer": false }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naga, Moyon", "iso_1_code": null, "iso_3_code": "nmo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9051", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Purum", "iso_1_code": null, "iso_3_code": "pub", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9052", + "scripts": [], + "own_tokenizer": false }, { "name": "Ralte", "iso_1_code": null, "iso_3_code": "ral", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9053", + "scripts": [], + "own_tokenizer": false }, { "name": "Simte", "iso_1_code": null, "iso_3_code": "smt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9054", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sizang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Siyin", "iso_1_code": null, "iso_3_code": "csy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9056", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gangte", "iso_1_code": null, "iso_3_code": "gnb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9057", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vaiphei", "iso_1_code": null, "iso_3_code": "vap", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9058", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zo", "iso_1_code": null, "iso_3_code": "zom", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9059", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9055", + "scripts": [], + "own_tokenizer": false }, { "name": "Thado", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Tedim", "iso_1_code": null, "iso_3_code": "ctd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9061", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, Paite", "iso_1_code": null, "iso_3_code": "pck", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9062", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, Thado", "iso_1_code": null, "iso_3_code": "tcz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9063", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9060", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9050", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Eastern Khumi", "iso_1_code": null, "iso_3_code": "cek", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9065", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mro-Khimi", "iso_1_code": null, "iso_3_code": "cmr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9066", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, Khumi", "iso_1_code": null, "iso_3_code": "cnk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9067", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, Songlai", "iso_1_code": null, "iso_3_code": "csj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9068", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Sumtu", "iso_1_code": null, "iso_3_code": "csv", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9069", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Rungtu", "iso_1_code": null, "iso_3_code": "rtc", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9070", + "scripts": [], + "own_tokenizer": false }, { "name": "Shendu", "iso_1_code": null, "iso_3_code": "shl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9071", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Rawngtu", "iso_1_code": null, "iso_3_code": "weu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9072", + "scripts": [], + "own_tokenizer": false }, { "name": "Cho-Asho", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Asho", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, L\u00e4okt\u00fc", "iso_1_code": null, "iso_3_code": "cey", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9075", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Laitu", "iso_1_code": null, "iso_3_code": "clj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9076", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Asho", "iso_1_code": null, "iso_3_code": "csh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9077", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9074", + "scripts": [], + "own_tokenizer": false }, { "name": "Cho", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Kaang", "iso_1_code": null, "iso_3_code": "ckn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9079", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Uppu", "iso_1_code": null, "iso_3_code": "cnb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9080", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Daai", "iso_1_code": null, "iso_3_code": "dao", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9081", + "scripts": [], + "own_tokenizer": false }, { "name": "Chin, Matu", "iso_1_code": null, "iso_3_code": "hlt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9082", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chin, M\u00fc\u00fcn", "iso_1_code": null, "iso_3_code": "mwq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9083", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9078", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9073", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9064", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9049", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9015", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngwi-Burmese", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Burmish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Achang", "iso_1_code": null, "iso_3_code": "acn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9087", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zaiwa", "iso_1_code": null, "iso_3_code": "atb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9088", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pela", "iso_1_code": null, "iso_3_code": "bxd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9089", + "scripts": [], + "own_tokenizer": false }, { "name": "Hpon", "iso_1_code": null, "iso_3_code": "hpo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9090", + "scripts": [], + "own_tokenizer": false }, { "name": "Lacid", "iso_1_code": null, "iso_3_code": "lsi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9091", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lhao Vo", "iso_1_code": null, "iso_3_code": "mhx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9092", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9086", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Danu", "iso_1_code": null, "iso_3_code": "dnv", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9094", + "scripts": [], + "own_tokenizer": false }, { "name": "Intha", "iso_1_code": null, "iso_3_code": "int", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9095", + "scripts": [], + "own_tokenizer": false }, { "name": "Burmese", "iso_1_code": "my", "iso_3_code": "mya", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9096", + "scripts": [ + "Mymr" + ], + "own_tokenizer": false }, { "name": "Rakhine", "iso_1_code": null, "iso_3_code": "rki", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9097", + "scripts": [], + "own_tokenizer": false }, { "name": "Marma", "iso_1_code": null, "iso_3_code": "rmz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9098", + "scripts": [], + "own_tokenizer": false }, { "name": "Taungyo", "iso_1_code": null, "iso_3_code": "tco", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9099", + "scripts": [], + "own_tokenizer": false }, { "name": "Tavoyan", "iso_1_code": null, "iso_3_code": "tvn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9100", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9093", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9085", + "scripts": [], + "own_tokenizer": false }, { "name": "Mru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chin, Anu-Khongso", "iso_1_code": null, "iso_3_code": "anl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9102", + "scripts": [], + "own_tokenizer": false }, { "name": "Mru", "iso_1_code": null, "iso_3_code": "mro", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9103", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9101", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngwi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Hlersu", "iso_1_code": null, "iso_3_code": "hle", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9106", + "scripts": [], + "own_tokenizer": false }, { "name": "Jinuo, Youle", "iso_1_code": null, "iso_3_code": "jiu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9107", + "scripts": [], + "own_tokenizer": false }, { "name": "Jinuo, Buyuan", "iso_1_code": null, "iso_3_code": "jiy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9108", + "scripts": [], + "own_tokenizer": false }, { "name": "Lahu Shi", "iso_1_code": null, "iso_3_code": "lhi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9109", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lahu", "iso_1_code": null, "iso_3_code": "lhu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9110", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lisu", "iso_1_code": null, "iso_3_code": "lis", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9111", + "scripts": [ + "Lisu" + ], + "own_tokenizer": false }, { "name": "Kucong", "iso_1_code": null, "iso_3_code": "lkc", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9112", + "scripts": [], + "own_tokenizer": false }, { "name": "Lamu", "iso_1_code": null, "iso_3_code": "llh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9113", + "scripts": [], + "own_tokenizer": false }, { "name": "Lipo", "iso_1_code": null, "iso_3_code": "lpo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9114", + "scripts": [], + "own_tokenizer": false }, { "name": "Lawu", "iso_1_code": null, "iso_3_code": "lwu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9115", + "scripts": [], + "own_tokenizer": false }, { "name": "Nusu", "iso_1_code": null, "iso_3_code": "nuf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9116", + "scripts": [], + "own_tokenizer": false }, { "name": "Lolopo", "iso_1_code": null, "iso_3_code": "ycl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9117", + "scripts": [], + "own_tokenizer": false }, { "name": "Lalo, Dongshanba", "iso_1_code": null, "iso_3_code": "yik", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9118", + "scripts": [], + "own_tokenizer": false }, { "name": "Miqie", "iso_1_code": null, "iso_3_code": "yiq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9119", + "scripts": [], + "own_tokenizer": false }, { "name": "Lalu, Eastern", "iso_1_code": null, "iso_3_code": "yit", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9120", + "scripts": [], + "own_tokenizer": false }, { "name": "Limi", "iso_1_code": null, "iso_3_code": "ylm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9121", + "scripts": [], + "own_tokenizer": false }, { "name": "Mili", "iso_1_code": null, "iso_3_code": "ymh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9122", + "scripts": [], + "own_tokenizer": false }, { "name": "Lang\u2019e", "iso_1_code": null, "iso_3_code": "yne", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9123", + "scripts": [], + "own_tokenizer": false }, { "name": "Sani", "iso_1_code": null, "iso_3_code": "ysn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9124", + "scripts": [], + "own_tokenizer": false }, { "name": "Lolopo, Southern", "iso_1_code": null, "iso_3_code": "ysp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9125", + "scripts": [], + "own_tokenizer": false }, { "name": "Talu", "iso_1_code": null, "iso_3_code": "yta", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9126", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanglang", "iso_1_code": null, "iso_3_code": "ytl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9127", + "scripts": [], + "own_tokenizer": false }, { "name": "Lalu, Western", "iso_1_code": null, "iso_3_code": "ywl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9128", + "scripts": [], + "own_tokenizer": false }, { "name": "Lalo, Central", "iso_1_code": null, "iso_3_code": "ywt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9129", + "scripts": [], + "own_tokenizer": false }, { "name": "Zauzou", "iso_1_code": null, "iso_3_code": "zal", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9130", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9105", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Nuosu", "iso_1_code": "ii", "iso_3_code": "iii", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9132", + "scripts": [], + "own_tokenizer": false }, { "name": "Katso", "iso_1_code": null, "iso_3_code": "kaf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9133", + "scripts": [], + "own_tokenizer": false }, { "name": "Samei", "iso_1_code": null, "iso_3_code": "smh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9134", + "scripts": [], + "own_tokenizer": false }, { "name": "Chesu", "iso_1_code": null, "iso_3_code": "ych", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9135", + "scripts": [], + "own_tokenizer": false }, { "name": "Gepo", "iso_1_code": null, "iso_3_code": "ygp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9136", + "scripts": [], + "own_tokenizer": false }, { "name": "Nasu, Wusa", "iso_1_code": null, "iso_3_code": "yig", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9137", + "scripts": [], + "own_tokenizer": false }, { "name": "Awu", "iso_1_code": null, "iso_3_code": "yiu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9138", + "scripts": [], + "own_tokenizer": false }, { "name": "Naluo", "iso_1_code": null, "iso_3_code": "ylo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9139", + "scripts": [], + "own_tokenizer": false }, { "name": "Aluo", "iso_1_code": null, "iso_3_code": "yna", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9140", + "scripts": [], + "own_tokenizer": false }, { "name": "Samatao", "iso_1_code": null, "iso_3_code": "ysd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9141", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanie", "iso_1_code": null, "iso_3_code": "ysy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9142", + "scripts": [], + "own_tokenizer": false }, { "name": "Yi, Wuding-Luquan", "iso_1_code": null, "iso_3_code": "ywq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9143", + "scripts": [], + "own_tokenizer": false }, { "name": "Nasu, Wumeng", "iso_1_code": null, "iso_3_code": "ywu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9144", + "scripts": [], + "own_tokenizer": false }, { "name": "Ayizi", "iso_1_code": null, "iso_3_code": "yyz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9145", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9131", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Alugu", "iso_1_code": null, "iso_3_code": "aub", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9147", + "scripts": [], + "own_tokenizer": false }, { "name": "Azha", "iso_1_code": null, "iso_3_code": "aza", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9148", + "scripts": [], + "own_tokenizer": false }, { "name": "Laghuu", "iso_1_code": null, "iso_3_code": "lgh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9149", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisu, Eastern", "iso_1_code": null, "iso_3_code": "nos", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9150", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisu, Southern", "iso_1_code": null, "iso_3_code": "nsd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9151", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisu, Northwestern", "iso_1_code": null, "iso_3_code": "nsf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9152", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisu, Southwestern", "iso_1_code": null, "iso_3_code": "nsv", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9153", + "scripts": [], + "own_tokenizer": false }, { "name": "Mantsi", "iso_1_code": null, "iso_3_code": "nty", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9154", + "scripts": [], + "own_tokenizer": false }, { "name": "Phula", "iso_1_code": null, "iso_3_code": "phh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9155", + "scripts": [], + "own_tokenizer": false }, { "name": "Bokha", "iso_1_code": null, "iso_3_code": "ybk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9156", + "scripts": [], + "own_tokenizer": false }, { "name": "Phowa, Hlepho", "iso_1_code": null, "iso_3_code": "yhl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9157", + "scripts": [], + "own_tokenizer": false }, { "name": "Ache", "iso_1_code": null, "iso_3_code": "yif", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9158", + "scripts": [], + "own_tokenizer": false }, { "name": "Pholo", "iso_1_code": null, "iso_3_code": "yip", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9159", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisu, Northern", "iso_1_code": null, "iso_3_code": "yiv", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9160", + "scripts": [], + "own_tokenizer": false }, { "name": "Axi", "iso_1_code": null, "iso_3_code": "yix", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9161", + "scripts": [], + "own_tokenizer": false }, { "name": "Azhe", "iso_1_code": null, "iso_3_code": "yiz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9162", + "scripts": [], + "own_tokenizer": false }, { "name": "Khlula", "iso_1_code": null, "iso_3_code": "ykl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9163", + "scripts": [], + "own_tokenizer": false }, { "name": "Kua-nsi", "iso_1_code": null, "iso_3_code": "ykn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9164", + "scripts": [], + "own_tokenizer": false }, { "name": "Kathu", "iso_1_code": null, "iso_3_code": "ykt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9165", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuamasi", "iso_1_code": null, "iso_3_code": "yku", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9166", + "scripts": [], + "own_tokenizer": false }, { "name": "Muji, Southern", "iso_1_code": null, "iso_3_code": "ymc", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9167", + "scripts": [], + "own_tokenizer": false }, { "name": "Moji", "iso_1_code": null, "iso_3_code": "ymi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9168", + "scripts": [], + "own_tokenizer": false }, { "name": "Muji, Qila", "iso_1_code": null, "iso_3_code": "ymq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9169", + "scripts": [], + "own_tokenizer": false }, { "name": "Muji, Northern", "iso_1_code": null, "iso_3_code": "ymx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9170", + "scripts": [], + "own_tokenizer": false }, { "name": "Muzi", "iso_1_code": null, "iso_3_code": "ymz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9171", + "scripts": [], + "own_tokenizer": false }, { "name": "Phala", "iso_1_code": null, "iso_3_code": "ypa", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9172", + "scripts": [], + "own_tokenizer": false }, { "name": "Phowa, Labo", "iso_1_code": null, "iso_3_code": "ypb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9173", + "scripts": [], + "own_tokenizer": false }, { "name": "Phola", "iso_1_code": null, "iso_3_code": "ypg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9174", + "scripts": [], + "own_tokenizer": false }, { "name": "Phupha", "iso_1_code": null, "iso_3_code": "yph", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9175", + "scripts": [], + "own_tokenizer": false }, { "name": "Phuma", "iso_1_code": null, "iso_3_code": "ypm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9176", + "scripts": [], + "own_tokenizer": false }, { "name": "Phowa, Ani", "iso_1_code": null, "iso_3_code": "ypn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9177", + "scripts": [], + "own_tokenizer": false }, { "name": "Phola, Alo", "iso_1_code": null, "iso_3_code": "ypo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9178", + "scripts": [], + "own_tokenizer": false }, { "name": "Phupa", "iso_1_code": null, "iso_3_code": "ypp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9179", + "scripts": [], + "own_tokenizer": false }, { "name": "Phuza", "iso_1_code": null, "iso_3_code": "ypz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9180", + "scripts": [], + "own_tokenizer": false }, { "name": "Sonaga", "iso_1_code": null, "iso_3_code": "ysg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9181", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisi", "iso_1_code": null, "iso_3_code": "yso", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9182", + "scripts": [], + "own_tokenizer": false }, { "name": "Thopho", "iso_1_code": null, "iso_3_code": "ytp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9183", + "scripts": [], + "own_tokenizer": false }, { "name": "Zokhuo", "iso_1_code": null, "iso_3_code": "yzk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9184", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9146", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Akeu", "iso_1_code": null, "iso_3_code": "aeu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9186", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Akha", "iso_1_code": null, "iso_3_code": "ahk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9187", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Biyo", "iso_1_code": null, "iso_3_code": "byo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9188", + "scripts": [], + "own_tokenizer": false }, { "name": "C\u00f4\u00f4ng", "iso_1_code": null, "iso_3_code": "cnc", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9189", + "scripts": [], + "own_tokenizer": false }, { "name": "Enu", "iso_1_code": null, "iso_3_code": "enu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9190", + "scripts": [], + "own_tokenizer": false }, { "name": "Hani", "iso_1_code": null, "iso_3_code": "hni", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9191", + "scripts": [], + "own_tokenizer": false }, { "name": "Honi", "iso_1_code": null, "iso_3_code": "how", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9192", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaduo", "iso_1_code": null, "iso_3_code": "ktp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9193", + "scripts": [], + "own_tokenizer": false }, { "name": "Lopi", "iso_1_code": null, "iso_3_code": "lov", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9194", + "scripts": [], + "own_tokenizer": false }, { "name": "Mpi", "iso_1_code": null, "iso_3_code": "mpz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9195", + "scripts": [], + "own_tokenizer": false }, { "name": "Phana\u2019", "iso_1_code": null, "iso_3_code": "phq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9196", + "scripts": [], + "own_tokenizer": false }, { "name": "Sangkong", "iso_1_code": null, "iso_3_code": "sgk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9197", + "scripts": [], + "own_tokenizer": false }, { "name": "Sila", "iso_1_code": null, "iso_3_code": "slt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9198", + "scripts": [], + "own_tokenizer": false }, { "name": "Chepya", "iso_1_code": null, "iso_3_code": "ycp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9199", + "scripts": [], + "own_tokenizer": false }, { "name": "Muda", "iso_1_code": null, "iso_3_code": "ymd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9200", + "scripts": [], + "own_tokenizer": false }, { "name": "Bisoid", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bisu", "iso_1_code": null, "iso_3_code": "bzi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9202", + "scripts": [ + "Thai" + ], + "own_tokenizer": false }, { "name": "Laomian", "iso_1_code": null, "iso_3_code": "lwm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9203", + "scripts": [], + "own_tokenizer": false }, { "name": "Phunoi", "iso_1_code": null, "iso_3_code": "pho", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9204", + "scripts": [], + "own_tokenizer": false }, { "name": "Pyen", "iso_1_code": null, "iso_3_code": "pyy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9205", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9201", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9185", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Laopang", "iso_1_code": null, "iso_3_code": "lbg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9207", + "scripts": [], + "own_tokenizer": false }, { "name": "Ugong", "iso_1_code": null, "iso_3_code": "ugo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9208", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9206", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9104", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9084", + "scripts": [], + "own_tokenizer": false }, { "name": "Northeastern Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bai, Central", "iso_1_code": null, "iso_3_code": "bca", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9211", + "scripts": [], + "own_tokenizer": false }, { "name": "Bai, Panyi", "iso_1_code": null, "iso_3_code": "bfc", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9212", + "scripts": [], + "own_tokenizer": false }, { "name": "Bai, Southern", "iso_1_code": null, "iso_3_code": "bfs", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9213", + "scripts": [], + "own_tokenizer": false }, { "name": "Bai, Lama", "iso_1_code": null, "iso_3_code": "lay", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9214", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9210", + "scripts": [], + "own_tokenizer": false }, { "name": "Baima", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Baima", "iso_1_code": null, "iso_3_code": "bqh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9216", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9215", + "scripts": [], + "own_tokenizer": false }, { "name": "Ersuish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ersu", "iso_1_code": null, "iso_3_code": "ers", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9218", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9217", + "scripts": [], + "own_tokenizer": false }, { "name": "Naic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Namuyi", "iso_1_code": null, "iso_3_code": "nmy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9220", + "scripts": [], + "own_tokenizer": false }, { "name": "Narua", "iso_1_code": null, "iso_3_code": "nru", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9221", + "scripts": [], + "own_tokenizer": false }, { "name": "Naxi", "iso_1_code": null, "iso_3_code": "nxq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9222", + "scripts": [], + "own_tokenizer": false }, { "name": "Shuhi", "iso_1_code": null, "iso_3_code": "sxg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9223", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9219", + "scripts": [], + "own_tokenizer": false }, { "name": "Qiangic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Qiang, Northern", "iso_1_code": null, "iso_3_code": "cng", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9225", + "scripts": [], + "own_tokenizer": false }, { "name": "Minyag, Eastern", "iso_1_code": null, "iso_3_code": "emq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9226", + "scripts": [], + "own_tokenizer": false }, { "name": "Guiqiong", "iso_1_code": null, "iso_3_code": "gqi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9227", + "scripts": [], + "own_tokenizer": false }, { "name": "Pumi, Northern", "iso_1_code": null, "iso_3_code": "pmi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9228", + "scripts": [], + "own_tokenizer": false }, { "name": "Pumi, Southern", "iso_1_code": null, "iso_3_code": "pmj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9229", + "scripts": [], + "own_tokenizer": false }, { "name": "Queyu", "iso_1_code": null, "iso_3_code": "qvy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9230", + "scripts": [], + "own_tokenizer": false }, { "name": "Qiang, Southern", "iso_1_code": null, "iso_3_code": "qxs", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9231", + "scripts": [], + "own_tokenizer": false }, { "name": "Minyag, Western", "iso_1_code": null, "iso_3_code": "wmg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9232", + "scripts": [], + "own_tokenizer": false }, { "name": "Zhaba", "iso_1_code": null, "iso_3_code": "zhb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9233", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9224", + "scripts": [], + "own_tokenizer": false }, { "name": "rGyalrongic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Horpa", "iso_1_code": null, "iso_3_code": "ero", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9235", + "scripts": [], + "own_tokenizer": false }, { "name": "sTodsde", "iso_1_code": null, "iso_3_code": "jih", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9236", + "scripts": [], + "own_tokenizer": false }, { "name": "Lavrung", "iso_1_code": null, "iso_3_code": "jiq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9237", + "scripts": [], + "own_tokenizer": false }, { "name": "Jiarong", "iso_1_code": null, "iso_3_code": "jya", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9238", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9234", + "scripts": [], + "own_tokenizer": false }, { "name": "Tujia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tujia, Northern", "iso_1_code": null, "iso_3_code": "tji", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9240", + "scripts": [], + "own_tokenizer": false }, { "name": "Tujia, Southern", "iso_1_code": null, "iso_3_code": "tjs", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9241", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9239", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9209", + "scripts": [], + "own_tokenizer": false }, { "name": "Sal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Boro-Garo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Deori", "iso_1_code": null, "iso_3_code": "der", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9244", + "scripts": [], + "own_tokenizer": false }, { "name": "Garo", "iso_1_code": null, "iso_3_code": "grt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9245", + "scripts": [ + "Beng" + ], + "own_tokenizer": false }, { "name": "Megam", "iso_1_code": null, "iso_3_code": "mef", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9246", + "scripts": [], + "own_tokenizer": false }, { "name": "Tippera", "iso_1_code": null, "iso_3_code": "tpe", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9247", + "scripts": [], + "own_tokenizer": false }, { "name": "Boro-Tiwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tiwa", "iso_1_code": null, "iso_3_code": "lax", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9249", + "scripts": [], + "own_tokenizer": false }, { "name": "Boro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Boro", "iso_1_code": null, "iso_3_code": "brx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9251", + "scripts": [ + "Latn", + "Deva" + ], + "own_tokenizer": false }, { "name": "Kachari", "iso_1_code": null, "iso_3_code": "xac", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9252", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9250", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9248", + "scripts": [], + "own_tokenizer": false }, { "name": "Dimasa-Kokborok", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dimasa", "iso_1_code": null, "iso_3_code": "dis", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9254", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kok Borok", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Riang", "iso_1_code": null, "iso_3_code": "ria", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9256", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kok Borok", "iso_1_code": null, "iso_3_code": "trp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9257", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Usoi", "iso_1_code": null, "iso_3_code": "usi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9258", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9255", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9253", + "scripts": [], + "own_tokenizer": false }, { "name": "Koch", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Atong", "iso_1_code": null, "iso_3_code": "aot", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9260", + "scripts": [], + "own_tokenizer": false }, { "name": "Koch", "iso_1_code": null, "iso_3_code": "kdq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9261", + "scripts": [], + "own_tokenizer": false }, { "name": "Rabha", "iso_1_code": null, "iso_3_code": "rah", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9262", + "scripts": [], + "own_tokenizer": false }, { "name": "Ruga", "iso_1_code": null, "iso_3_code": "ruh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9263", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9259", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naga, Khiamniungan", "iso_1_code": null, "iso_3_code": "kix", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9265", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Lainong", "iso_1_code": null, "iso_3_code": "lzn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9266", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Chang", "iso_1_code": null, "iso_3_code": "nbc", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9267", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Konyak", "iso_1_code": null, "iso_3_code": "nbe", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9268", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Nocte", "iso_1_code": null, "iso_3_code": "njb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9269", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Lao", "iso_1_code": null, "iso_3_code": "nlq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9270", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Wancho", "iso_1_code": null, "iso_3_code": "nnp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9271", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Ponyo-Gongwang", "iso_1_code": null, "iso_3_code": "npg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9272", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Phom", "iso_1_code": null, "iso_3_code": "nph", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9273", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Chen-Kayu", "iso_1_code": null, "iso_3_code": "nqq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9274", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Tangshang", "iso_1_code": null, "iso_3_code": "nst", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9275", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Tutsa", "iso_1_code": null, "iso_3_code": "tvt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9276", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Paungnyuan", "iso_1_code": null, "iso_3_code": "umn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9277", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9264", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9243", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhimalish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dhimal", "iso_1_code": null, "iso_3_code": "dhi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9279", + "scripts": [], + "own_tokenizer": false }, { "name": "Toto", "iso_1_code": null, "iso_3_code": "txo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9280", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9278", + "scripts": [], + "own_tokenizer": false }, { "name": "Jingppaw-Asakia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Asakian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chak", "iso_1_code": null, "iso_3_code": "ckh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9283", + "scripts": [], + "own_tokenizer": false }, { "name": "Kadu", "iso_1_code": null, "iso_3_code": "zkd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9284", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanan", "iso_1_code": null, "iso_3_code": "zkn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9285", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9282", + "scripts": [], + "own_tokenizer": false }, { "name": "Jingphaw", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Jingpho", "iso_1_code": null, "iso_3_code": "kac", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9287", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Singpho", "iso_1_code": null, "iso_3_code": "sgp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9288", + "scripts": [], + "own_tokenizer": false }, { "name": "Taman", "iso_1_code": null, "iso_3_code": "tcl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9289", + "scripts": [], + "own_tokenizer": false }, { "name": "Turung", "iso_1_code": null, "iso_3_code": "try", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9290", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9286", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9281", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9242", + "scripts": [], + "own_tokenizer": false }, { "name": "Tangkhulic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naga, Khoibu", "iso_1_code": null, "iso_3_code": "nkb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9292", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Tangkhul", "iso_1_code": null, "iso_3_code": "nmf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9293", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Maring", "iso_1_code": null, "iso_3_code": "nng", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9294", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Tangkhul", "iso_1_code": null, "iso_3_code": "ntx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9291", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Darlong", "iso_1_code": null, "iso_3_code": "dln", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9297", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "jkr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9298", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Makuri", "iso_1_code": null, "iso_3_code": "jmn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9299", + "scripts": [], + "own_tokenizer": false }, { "name": "Khamba", "iso_1_code": null, "iso_3_code": "kbg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9300", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Long Phuri", "iso_1_code": null, "iso_3_code": "lpn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9301", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Inpui", "iso_1_code": null, "iso_3_code": "nkf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9302", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Puimei", "iso_1_code": null, "iso_3_code": "npu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9303", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Akyaung Ari", "iso_1_code": null, "iso_3_code": "nqy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9304", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Kokak", "iso_1_code": null, "iso_3_code": "nxk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9305", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Jejara", "iso_1_code": null, "iso_3_code": "pzn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9306", + "scripts": [], + "own_tokenizer": false }, { "name": "Ranglong", "iso_1_code": null, "iso_3_code": "rnl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9307", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9296", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naga, Rongmei", "iso_1_code": null, "iso_3_code": "nbu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9309", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Liangmai", "iso_1_code": null, "iso_3_code": "njn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9310", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Koireng", "iso_1_code": null, "iso_3_code": "nkd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9311", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Thangal", "iso_1_code": null, "iso_3_code": "nki", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9312", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Maram", "iso_1_code": null, "iso_3_code": "nma", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9313", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Naga, Mzieme", "iso_1_code": null, "iso_3_code": "nme", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9314", + "scripts": [], + "own_tokenizer": false }, { "name": "Naga, Zeme", "iso_1_code": null, "iso_3_code": "nzm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "9315", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9308", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gongduk", "iso_1_code": null, "iso_3_code": "goe", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9318", + "scripts": [], + "own_tokenizer": false }, { "name": "Monpa, Kalaktang", "iso_1_code": null, "iso_3_code": "kkf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9319", + "scripts": [], + "own_tokenizer": false }, { "name": "Lhokpu", "iso_1_code": null, "iso_3_code": "lhp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9320", + "scripts": [], + "own_tokenizer": false }, { "name": "Olekha", "iso_1_code": null, "iso_3_code": "ole", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9321", + "scripts": [], + "own_tokenizer": false }, { "name": "Tshangla", "iso_1_code": null, "iso_3_code": "tsj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9322", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Choni", "iso_1_code": null, "iso_3_code": "cda", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9324", + "scripts": [], + "own_tokenizer": false }, { "name": "Tseku", "iso_1_code": null, "iso_3_code": "tsk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9325", + "scripts": [], + "own_tokenizer": false }, { "name": "Amdo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tibetan, Amdo", "iso_1_code": null, "iso_3_code": "adx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9327", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9326", + "scripts": [], + "own_tokenizer": false }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tibetan, Central", "iso_1_code": "bo", "iso_3_code": "bod", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9329", + "scripts": [ + "Tibt" + ], + "own_tokenizer": false }, { "name": "gTsang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dolpo", "iso_1_code": null, "iso_3_code": "dre", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9331", + "scripts": [], + "own_tokenizer": false }, { "name": "Gyalsumdo", "iso_1_code": null, "iso_3_code": "gyo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9332", + "scripts": [], + "own_tokenizer": false }, { "name": "Humla", "iso_1_code": null, "iso_3_code": "hut", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9333", + "scripts": [], + "own_tokenizer": false }, { "name": "Jirel", "iso_1_code": null, "iso_3_code": "jul", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9334", + "scripts": [], + "own_tokenizer": false }, { "name": "Kyerung", "iso_1_code": null, "iso_3_code": "kgy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9335", + "scripts": [], + "own_tokenizer": false }, { "name": "Nubri", "iso_1_code": null, "iso_3_code": "kte", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9336", + "scripts": [], + "own_tokenizer": false }, { "name": "Lhomi", "iso_1_code": null, "iso_3_code": "lhm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9337", + "scripts": [], + "own_tokenizer": false }, { "name": "Lhowa", "iso_1_code": null, "iso_3_code": "loy", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9338", + "scripts": [], + "own_tokenizer": false }, { "name": "Mugom-Karmarong", "iso_1_code": null, "iso_3_code": "muk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9339", + "scripts": [], + "own_tokenizer": false }, { "name": "Hyolmo", "iso_1_code": null, "iso_3_code": "scp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9340", + "scripts": [], + "own_tokenizer": false }, { "name": "Syuba", "iso_1_code": null, "iso_3_code": "syw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9341", + "scripts": [], + "own_tokenizer": false }, { "name": "Tichurong", "iso_1_code": null, "iso_3_code": "tcn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9342", + "scripts": [], + "own_tokenizer": false }, { "name": "Tsum", "iso_1_code": null, "iso_3_code": "ttz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9343", + "scripts": [], + "own_tokenizer": false }, { "name": "Sherpa", "iso_1_code": null, "iso_3_code": "xsr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9344", + "scripts": [ + "Deva" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9330", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Brokkat", "iso_1_code": null, "iso_3_code": "bro", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9346", + "scripts": [], + "own_tokenizer": false }, { "name": "Chocangacakha", "iso_1_code": null, "iso_3_code": "cgk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9347", + "scripts": [], + "own_tokenizer": false }, { "name": "Dzongkha", "iso_1_code": "dz", "iso_3_code": "dzo", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9348", + "scripts": [ + "Tibt" + ], + "own_tokenizer": false }, { "name": "Groma", "iso_1_code": null, "iso_3_code": "gro", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9349", + "scripts": [], + "own_tokenizer": false }, { "name": "Lakha", "iso_1_code": null, "iso_3_code": "lkh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9350", + "scripts": [], + "own_tokenizer": false }, { "name": "Lunanakha", "iso_1_code": null, "iso_3_code": "luk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9351", + "scripts": [], + "own_tokenizer": false }, { "name": "Layakha", "iso_1_code": null, "iso_3_code": "lya", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9352", + "scripts": [], + "own_tokenizer": false }, { "name": "Brokpake", "iso_1_code": null, "iso_3_code": "sgt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9353", + "scripts": [], + "own_tokenizer": false }, { "name": "Sikkimese", "iso_1_code": null, "iso_3_code": "sip", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9354", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9345", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Jad", "iso_1_code": null, "iso_3_code": "jda", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9356", + "scripts": [], + "own_tokenizer": false }, { "name": "Stod Bhoti", "iso_1_code": null, "iso_3_code": "sbu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9357", + "scripts": [], + "own_tokenizer": false }, { "name": "Spiti Bhoti", "iso_1_code": null, "iso_3_code": "spt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9358", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9355", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9328", + "scripts": [], + "own_tokenizer": false }, { "name": "Khams", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tibetan, Khams", "iso_1_code": null, "iso_3_code": "khg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9360", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9359", + "scripts": [], + "own_tokenizer": false }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Naaba", "iso_1_code": null, "iso_3_code": "nao", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9362", + "scripts": [], + "own_tokenizer": false }, { "name": "Walungge", "iso_1_code": null, "iso_3_code": "ola", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9363", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9361", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Balti", "iso_1_code": null, "iso_3_code": "bft", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9365", + "scripts": [], + "own_tokenizer": false }, { "name": "Changthang", "iso_1_code": null, "iso_3_code": "cna", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9366", + "scripts": [], + "own_tokenizer": false }, { "name": "Ladakhi", "iso_1_code": null, "iso_3_code": "lbj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9367", + "scripts": [ + "Tibt" + ], + "own_tokenizer": false }, { "name": "Purig", "iso_1_code": null, "iso_3_code": "prx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9368", + "scripts": [], + "own_tokenizer": false }, { "name": "Zangskari", "iso_1_code": null, "iso_3_code": "zau", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9369", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9364", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9323", + "scripts": [], + "own_tokenizer": false }, { "name": "East Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dakpakha", "iso_1_code": null, "iso_3_code": "dka", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9371", + "scripts": [], + "own_tokenizer": false }, { "name": "Monpa, Tawang", "iso_1_code": null, "iso_3_code": "twm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9372", + "scripts": [], + "own_tokenizer": false }, { "name": "Bumthang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dzalakha", "iso_1_code": null, "iso_3_code": "dzl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9374", + "scripts": [], + "own_tokenizer": false }, { "name": "Bumthangkha", "iso_1_code": null, "iso_3_code": "kjz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9375", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyenkha", "iso_1_code": null, "iso_3_code": "neh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9376", + "scripts": [], + "own_tokenizer": false }, { "name": "Nupbikha", "iso_1_code": null, "iso_3_code": "npb", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9377", + "scripts": [], + "own_tokenizer": false }, { "name": "Chalikha", "iso_1_code": null, "iso_3_code": "tgf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9378", + "scripts": [], + "own_tokenizer": false }, { "name": "Khengkha", "iso_1_code": null, "iso_3_code": "xkf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9379", + "scripts": [], + "own_tokenizer": false }, { "name": "Kurtokha", "iso_1_code": null, "iso_3_code": "xkz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9380", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9373", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9370", + "scripts": [], + "own_tokenizer": false }, { "name": "West Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Dura", "iso_1_code": null, "iso_3_code": "drq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9382", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaike", "iso_1_code": null, "iso_3_code": "kzq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9383", + "scripts": [], + "own_tokenizer": false }, { "name": "Ghale", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Ghale, Southern", "iso_1_code": null, "iso_3_code": "ghe", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9385", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Ghale, Northern", "iso_1_code": null, "iso_3_code": "ghh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9386", + "scripts": [], + "own_tokenizer": false }, { "name": "Kuke", "iso_1_code": null, "iso_3_code": "ght", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9387", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9384", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurung-Tamang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gurungic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chantyal", "iso_1_code": null, "iso_3_code": "chx", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9390", + "scripts": [], + "own_tokenizer": false }, { "name": "Gurung", "iso_1_code": null, "iso_3_code": "gvr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9391", + "scripts": [], + "own_tokenizer": false }, { "name": "Nyeshangte", "iso_1_code": null, "iso_3_code": "nmm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9392", + "scripts": [], + "own_tokenizer": false }, { "name": "Nar Phu", "iso_1_code": null, "iso_3_code": "npa", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9393", + "scripts": [], + "own_tokenizer": false }, { "name": "Seke", "iso_1_code": null, "iso_3_code": "skj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9394", + "scripts": [], + "own_tokenizer": false }, { "name": "Thakali", "iso_1_code": null, "iso_3_code": "ths", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9395", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9389", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Tamang, Eastern", "iso_1_code": null, "iso_3_code": "taj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9397", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Tamang, Western", "iso_1_code": null, "iso_3_code": "tdg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9398", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamang, Eastern Gorkha", "iso_1_code": null, "iso_3_code": "tge", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9399", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9396", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9388", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9381", + "scripts": [], + "own_tokenizer": false }, { "name": "West Himalayish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Almora", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Byangsi", "iso_1_code": null, "iso_3_code": "bee", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9402", + "scripts": [], + "own_tokenizer": false }, { "name": "Chaudangsi", "iso_1_code": null, "iso_3_code": "cdn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9403", + "scripts": [], + "own_tokenizer": false }, { "name": "Darmiya", "iso_1_code": null, "iso_3_code": "drd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9404", + "scripts": [], + "own_tokenizer": false }, { "name": "Rangkas", "iso_1_code": null, "iso_3_code": "rgk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9405", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9401", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinauri", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Gahri", "iso_1_code": null, "iso_3_code": "bfu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9407", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinnauri, Chitkuli", "iso_1_code": null, "iso_3_code": "cik", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9408", + "scripts": [], + "own_tokenizer": false }, { "name": "Jangshung", "iso_1_code": null, "iso_3_code": "jna", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9409", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinnauri", "iso_1_code": null, "iso_3_code": "kfk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9410", + "scripts": [], + "own_tokenizer": false }, { "name": "Pattani", "iso_1_code": null, "iso_3_code": "lae", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9411", + "scripts": [], + "own_tokenizer": false }, { "name": "Tinani", "iso_1_code": null, "iso_3_code": "lbf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9412", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinnauri, Bhoti", "iso_1_code": null, "iso_3_code": "nes", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9413", + "scripts": [], + "own_tokenizer": false }, { "name": "Rongpo", "iso_1_code": null, "iso_3_code": "rnp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9414", + "scripts": [], + "own_tokenizer": false }, { "name": "Shumcho", "iso_1_code": null, "iso_3_code": "scu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9415", + "scripts": [], + "own_tokenizer": false }, { "name": "Sunam", "iso_1_code": null, "iso_3_code": "ssk", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9416", + "scripts": [], + "own_tokenizer": false }, { "name": "Kinnauri, Chhoyul", "iso_1_code": null, "iso_3_code": "tpq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9417", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanashi", "iso_1_code": null, "iso_3_code": "xns", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9418", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9406", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9400", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9317", + "scripts": [], + "own_tokenizer": false }, { "name": "Himalayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Central Himalayan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chepang-Bhujel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bhujel", "iso_1_code": null, "iso_3_code": "byh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9422", + "scripts": [], + "own_tokenizer": false }, { "name": "Chepang", "iso_1_code": null, "iso_3_code": "cdm", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9423", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9421", + "scripts": [], + "own_tokenizer": false }, { "name": "Kham-Magar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kham", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Kham, Gamal", "iso_1_code": null, "iso_3_code": "kgj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9426", + "scripts": [], + "own_tokenizer": false }, { "name": "Kham, Eastern Parbate", "iso_1_code": null, "iso_3_code": "kif", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9427", + "scripts": [], + "own_tokenizer": false }, { "name": "Kham, Sheshi", "iso_1_code": null, "iso_3_code": "kip", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9428", + "scripts": [], + "own_tokenizer": false }, { "name": "Kham, Western Parbate", "iso_1_code": null, "iso_3_code": "kjl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9429", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9425", + "scripts": [], + "own_tokenizer": false }, { "name": "Magar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Magar, Eastern", "iso_1_code": null, "iso_3_code": "mgp", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9431", + "scripts": [], + "own_tokenizer": false }, { "name": "Magar, Western", "iso_1_code": null, "iso_3_code": "mrd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9432", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9430", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9424", + "scripts": [], + "own_tokenizer": false }, { "name": "Newar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Newar", "iso_1_code": null, "iso_3_code": "new", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9434", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Pahari", "iso_1_code": null, "iso_3_code": "phj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9435", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9433", + "scripts": [], + "own_tokenizer": false }, { "name": "Raute-Raji", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Rawat", "iso_1_code": null, "iso_3_code": "jnl", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9437", + "scripts": [], + "own_tokenizer": false }, { "name": "Raute", "iso_1_code": null, "iso_3_code": "rau", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9438", + "scripts": [], + "own_tokenizer": false }, { "name": "Raji", "iso_1_code": null, "iso_3_code": "rji", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9439", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9436", + "scripts": [], + "own_tokenizer": false }, { "name": "Thangmi-Baraamu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Baram", "iso_1_code": null, "iso_3_code": "brd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9441", + "scripts": [], + "own_tokenizer": false }, { "name": "Thangmi", "iso_1_code": null, "iso_3_code": "thf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9442", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9420", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiranti", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Athpariya", "iso_1_code": null, "iso_3_code": "aph", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9445", + "scripts": [], + "own_tokenizer": false }, { "name": "Bantawa", "iso_1_code": null, "iso_3_code": "bap", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9446", + "scripts": [], + "own_tokenizer": false }, { "name": "Belhariya", "iso_1_code": null, "iso_3_code": "byw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9447", + "scripts": [], + "own_tokenizer": false }, { "name": "Chhintang", "iso_1_code": null, "iso_3_code": "ctn", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9448", + "scripts": [], + "own_tokenizer": false }, { "name": "Chhiling", "iso_1_code": null, "iso_3_code": "cur", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9449", + "scripts": [], + "own_tokenizer": false }, { "name": "Chukwa", "iso_1_code": null, "iso_3_code": "cuw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9450", + "scripts": [], + "own_tokenizer": false }, { "name": "Mewahang, Eastern", "iso_1_code": null, "iso_3_code": "emg", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9451", + "scripts": [], + "own_tokenizer": false }, { "name": "Kulung", "iso_1_code": null, "iso_3_code": "kle", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9452", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Lohorung", "iso_1_code": null, "iso_3_code": "lbr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9453", + "scripts": [], + "own_tokenizer": false }, { "name": "Limbu", "iso_1_code": null, "iso_3_code": "lif", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9454", + "scripts": [ + "Deva", + "Limb" + ], + "own_tokenizer": false }, { "name": "Mugali", "iso_1_code": null, "iso_3_code": "lmh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9455", + "scripts": [], + "own_tokenizer": false }, { "name": "Yamphu, Southern", "iso_1_code": null, "iso_3_code": "lrr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9456", + "scripts": [], + "own_tokenizer": false }, { "name": "Yakkha, Chhathare", "iso_1_code": null, "iso_3_code": "luu", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9457", + "scripts": [], + "own_tokenizer": false }, { "name": "Nachiring", "iso_1_code": null, "iso_3_code": "ncd", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9458", + "scripts": [], + "own_tokenizer": false }, { "name": "Phangduwali", "iso_1_code": null, "iso_3_code": "phw", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9459", + "scripts": [], + "own_tokenizer": false }, { "name": "Puma", "iso_1_code": null, "iso_3_code": "pum", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9460", + "scripts": [], + "own_tokenizer": false }, { "name": "Dungmali", "iso_1_code": null, "iso_3_code": "raa", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9461", + "scripts": [], + "own_tokenizer": false }, { "name": "Chamling", "iso_1_code": null, "iso_3_code": "rab", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9462", + "scripts": [], + "own_tokenizer": false }, { "name": "Mewahang, Western", "iso_1_code": null, "iso_3_code": "raf", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9463", + "scripts": [], + "own_tokenizer": false }, { "name": "Saam", "iso_1_code": null, "iso_3_code": "raq", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9464", + "scripts": [], + "own_tokenizer": false }, { "name": "Sampang", "iso_1_code": null, "iso_3_code": "rav", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9465", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Yakkha", "iso_1_code": null, "iso_3_code": "ybh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9466", + "scripts": [], + "own_tokenizer": false }, { "name": "Yamphu", "iso_1_code": null, "iso_3_code": "ybi", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9467", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9444", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Hani": { + "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", + "original_lang_name": "chinese", + "original_lang_code": "zho", + "scripts": [ + "Latn", + "Hani" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Bahing", "iso_1_code": null, "iso_3_code": "bhj", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9469", + "scripts": [], + "own_tokenizer": false }, { "name": "Dumi", "iso_1_code": null, "iso_3_code": "dus", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9470", + "scripts": [], + "own_tokenizer": false }, { "name": "Jerung", "iso_1_code": null, "iso_3_code": "jee", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9471", + "scripts": [], + "own_tokenizer": false }, { "name": "Koyee", "iso_1_code": null, "iso_3_code": "kkt", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9472", + "scripts": [], + "own_tokenizer": false }, { "name": "Khaling", "iso_1_code": null, "iso_3_code": "klr", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9473", + "scripts": [], + "own_tokenizer": false }, { "name": "Sunwar", "iso_1_code": null, "iso_3_code": "suz", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9474", + "scripts": [ + "Deva" + ], + "own_tokenizer": false }, { "name": "Thulung", "iso_1_code": null, "iso_3_code": "tdh", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9475", + "scripts": [], + "own_tokenizer": false }, { "name": "Tilung", "iso_1_code": null, "iso_3_code": "tij", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9476", + "scripts": [], + "own_tokenizer": false }, { "name": "Wayu", "iso_1_code": null, "iso_3_code": "vay", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9477", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambule", "iso_1_code": null, "iso_3_code": "wme", - "tokenizer": { - "name": "chinese", - "tokenizer": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9478", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9468", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9443", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9419", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9316", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8937", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "8918", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Siouan-Catawban.json b/data/Siouan-Catawban.json index e194f553fcb16abb885a5bce245c47f0b319f5d9..fea9ff276f4f2180072520665805e71f41e67519 100644 --- a/data/Siouan-Catawban.json +++ b/data/Siouan-Catawban.json @@ -2,232 +2,290 @@ "name": "Siouan-Catawban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Catawban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Catawba", "iso_1_code": null, "iso_3_code": "chc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9481", + "scripts": [], + "own_tokenizer": false }, { "name": "Woccon", "iso_1_code": null, "iso_3_code": "xwc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9482", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9480", + "scripts": [], + "own_tokenizer": false }, { "name": "Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mandan", "iso_1_code": null, "iso_3_code": "mhq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9484", + "scripts": [], + "own_tokenizer": false }, { "name": "Mississippi Valley-Ohio Valley Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Michigamea", "iso_1_code": null, "iso_3_code": "cmm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9486", + "scripts": [], + "own_tokenizer": false }, { "name": "Ofo", "iso_1_code": null, "iso_3_code": "ofo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9487", + "scripts": [], + "own_tokenizer": false }, { "name": "Tutelo", "iso_1_code": null, "iso_3_code": "tta", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9488", + "scripts": [], + "own_tokenizer": false }, { "name": "Mississippi Valley Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Biloxi", "iso_1_code": null, "iso_3_code": "bll", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9490", + "scripts": [], + "own_tokenizer": false }, { "name": "Chiwere-Winnebago", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Iowa-Oto", "iso_1_code": null, "iso_3_code": "iow", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9492", + "scripts": [], + "own_tokenizer": false }, { "name": "Ho-Chunk", "iso_1_code": null, "iso_3_code": "win", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9493", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9491", + "scripts": [], + "own_tokenizer": false }, { "name": "Dakota", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Assiniboine", "iso_1_code": null, "iso_3_code": "asb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9495", + "scripts": [], + "own_tokenizer": false }, { "name": "Dakota", "iso_1_code": null, "iso_3_code": "dak", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9496", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lakota", "iso_1_code": null, "iso_3_code": "lkt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9497", + "scripts": [], + "own_tokenizer": false }, { "name": "Stoney", "iso_1_code": null, "iso_3_code": "sto", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9498", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9494", + "scripts": [], + "own_tokenizer": false }, { "name": "Dhegihan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kansa", "iso_1_code": null, "iso_3_code": "ksk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9500", + "scripts": [], + "own_tokenizer": false }, { "name": "Omaha-Ponca", "iso_1_code": null, "iso_3_code": "oma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9501", + "scripts": [], + "own_tokenizer": false }, { "name": "Osage", "iso_1_code": null, "iso_3_code": "osa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9502", + "scripts": [], + "own_tokenizer": false }, { "name": "Quapaw", "iso_1_code": null, "iso_3_code": "qua", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9503", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9499", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9489", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9485", + "scripts": [], + "own_tokenizer": false }, { "name": "Missouri River Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Crow", "iso_1_code": null, "iso_3_code": "cro", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9505", + "scripts": [], + "own_tokenizer": false }, { "name": "Hidatsa", "iso_1_code": null, "iso_3_code": "hid", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9506", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9504", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9483", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9479", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Skou.json b/data/Skou.json index bd48e92f74ab064d7c68007c7240da201f8f1e28..0449c2c0724438175e70f5b0ab0ab29c11e7932d 100644 --- a/data/Skou.json +++ b/data/Skou.json @@ -2,187 +2,231 @@ "name": "Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "I\u2019saka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "I\u2019saka", "iso_1_code": null, "iso_3_code": "ksi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9509", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9508", + "scripts": [], + "own_tokenizer": false }, { "name": "Skou-Serra-Lagoon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nuclear Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dumo", "iso_1_code": null, "iso_3_code": "vam", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9513", + "scripts": [], + "own_tokenizer": false }, { "name": "Wutung", "iso_1_code": null, "iso_3_code": "wut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9514", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9512", + "scripts": [], + "own_tokenizer": false }, { "name": "Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Skou", "iso_1_code": null, "iso_3_code": "skv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9516", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9515", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9511", + "scripts": [], + "own_tokenizer": false }, { "name": "Serra Hills", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lagoon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bauni", "iso_1_code": null, "iso_3_code": "bpe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9519", + "scripts": [], + "own_tokenizer": false }, { "name": "Bouni", "iso_1_code": null, "iso_3_code": "suo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9520", + "scripts": [], + "own_tokenizer": false }, { "name": "Uni", "iso_1_code": null, "iso_3_code": "uni", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9518", + "scripts": [], + "own_tokenizer": false }, { "name": "Puari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Puare", "iso_1_code": null, "iso_3_code": "pux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9523", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9522", + "scripts": [], + "own_tokenizer": false }, { "name": "Rawo-Main Serra", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Main Serra", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pin", "iso_1_code": null, "iso_3_code": "wmx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9526", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9525", + "scripts": [], + "own_tokenizer": false }, { "name": "Rawo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Rawo", "iso_1_code": null, "iso_3_code": "rwa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9528", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9527", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9524", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9517", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9510", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9507", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Somahai.json b/data/Somahai.json index 5f75642fe79242592fe56d6e838ebdd63b4846b3..a7e4e9e211e82fbea472bfb68436f84744563af4 100644 --- a/data/Somahai.json +++ b/data/Somahai.json @@ -2,24 +2,30 @@ "name": "Somahai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Momina", "iso_1_code": null, "iso_3_code": "mmb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9530", + "scripts": [], + "own_tokenizer": false }, { "name": "Momuna", "iso_1_code": null, "iso_3_code": "mqf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9531", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9529", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/South Bougainville.json b/data/South Bougainville.json index c50b74e4af3096098613be7627a9cbfe49ad97b8..b5011f69002aa74c18a23d2ba36179e1c88c60e9 100644 --- a/data/South Bougainville.json +++ b/data/South Bougainville.json @@ -2,98 +2,124 @@ "name": "South Bougainville", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Buin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Terei", "iso_1_code": null, "iso_3_code": "buo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9564", + "scripts": [], + "own_tokenizer": false }, { "name": "Motuna", "iso_1_code": null, "iso_3_code": "siw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9565", + "scripts": [], + "own_tokenizer": false }, { "name": "Uisai", "iso_1_code": null, "iso_3_code": "uis", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9566", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9563", + "scripts": [], + "own_tokenizer": false }, { "name": "Nasioi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Koromira", "iso_1_code": null, "iso_3_code": "kqj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9568", + "scripts": [], + "own_tokenizer": false }, { "name": "Daantanai\u2019", "iso_1_code": null, "iso_3_code": "lni", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9569", + "scripts": [], + "own_tokenizer": false }, { "name": "Naasioi", "iso_1_code": null, "iso_3_code": "nas", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9570", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sibe", "iso_1_code": null, "iso_3_code": "nco", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9571", + "scripts": [], + "own_tokenizer": false }, { "name": "Oune", "iso_1_code": null, "iso_3_code": "oue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9572", + "scripts": [], + "own_tokenizer": false }, { "name": "Simeku", "iso_1_code": null, "iso_3_code": "smz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9573", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9567", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9562", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/South-Central Papuan.json b/data/South-Central Papuan.json index 9174ef3b8322666792e6a3918134917564bd0e48..da6d8757535adcd6d20f128232f774bb3ab2f40f 100644 --- a/data/South-Central Papuan.json +++ b/data/South-Central Papuan.json @@ -2,247 +2,309 @@ "name": "South-Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Morehead-Upper Maro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nambu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Namo", "iso_1_code": null, "iso_3_code": "mxw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9535", + "scripts": [], + "own_tokenizer": false }, { "name": "Nambo", "iso_1_code": null, "iso_3_code": "ncm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9536", + "scripts": [], + "own_tokenizer": false }, { "name": "Neme", "iso_1_code": null, "iso_3_code": "nex", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9537", + "scripts": [], + "own_tokenizer": false }, { "name": "Namat", "iso_1_code": null, "iso_3_code": "nkm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9538", + "scripts": [], + "own_tokenizer": false }, { "name": "Nama", "iso_1_code": null, "iso_3_code": "nmx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9539", + "scripts": [], + "own_tokenizer": false }, { "name": "Nen", "iso_1_code": null, "iso_3_code": "nqn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9540", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9534", + "scripts": [], + "own_tokenizer": false }, { "name": "Tonda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Blafe", "iso_1_code": null, "iso_3_code": "bfh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9542", + "scripts": [], + "own_tokenizer": false }, { "name": "Rema", "iso_1_code": null, "iso_3_code": "bow", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9543", + "scripts": [], + "own_tokenizer": false }, { "name": "Wartha Thuntai", "iso_1_code": null, "iso_3_code": "gnt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9544", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanum, Ngk\u00e2lmpw", "iso_1_code": null, "iso_3_code": "kcd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9545", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanum, B\u00e4di", "iso_1_code": null, "iso_3_code": "khd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9546", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanum, Sota", "iso_1_code": null, "iso_3_code": "krz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9547", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanum, Sm\u00e4rky", "iso_1_code": null, "iso_3_code": "kxq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9548", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanja", "iso_1_code": null, "iso_3_code": "pep", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9549", + "scripts": [], + "own_tokenizer": false }, { "name": "Aramba", "iso_1_code": null, "iso_3_code": "stk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9550", + "scripts": [], + "own_tokenizer": false }, { "name": "W\u00e1ra", "iso_1_code": null, "iso_3_code": "tci", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9551", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9541", + "scripts": [], + "own_tokenizer": false }, { "name": "Yey", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yei", "iso_1_code": null, "iso_3_code": "jei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9553", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9552", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9533", + "scripts": [], + "own_tokenizer": false }, { "name": "Pahoturi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Idi", "iso_1_code": null, "iso_3_code": "idi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9555", + "scripts": [], + "own_tokenizer": false }, { "name": "Agob", "iso_1_code": null, "iso_3_code": "kit", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9556", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9554", + "scripts": [], + "own_tokenizer": false }, { "name": "Waia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tabo", "iso_1_code": null, "iso_3_code": "knv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9558", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9557", + "scripts": [], + "own_tokenizer": false }, { "name": "Yelmek-Maklew", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yelmek", "iso_1_code": null, "iso_3_code": "jel", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9560", + "scripts": [], + "own_tokenizer": false }, { "name": "Maklew", "iso_1_code": null, "iso_3_code": "mgf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9561", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9559", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9532", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git "a/data/S\303\241livan.json" "b/data/S\303\241livan.json" index 57ff4a75dacbf35ae803915cd71557da7fceb9ac..2027a376d02a59748edbe2dd26f50cbe59a57564 100644 --- "a/data/S\303\241livan.json" +++ "b/data/S\303\241livan.json" @@ -2,41 +2,51 @@ "name": "S\u00e1livan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "S\u00e1liba", "iso_1_code": null, "iso_3_code": "slc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9575", + "scripts": [], + "own_tokenizer": false }, { "name": "Piaroa-Maco", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Piaroa", "iso_1_code": null, "iso_3_code": "pid", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9577", + "scripts": [], + "own_tokenizer": false }, { "name": "Maco", "iso_1_code": null, "iso_3_code": "wpc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9578", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9576", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9574", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tacanan.json b/data/Tacanan.json index bff120da2fe2e08a072e02fee3a40371a1ede693..79799ef410df9316106252524ebd9fd9609f7071 100644 --- a/data/Tacanan.json +++ b/data/Tacanan.json @@ -2,74 +2,98 @@ "name": "Tacanan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cavine\u00f1a", "iso_1_code": null, "iso_3_code": "cav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9580", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ese Ejja", "iso_1_code": null, "iso_3_code": "ese", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9582", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Toromono", "iso_1_code": null, "iso_3_code": "tno", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9583", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9581", + "scripts": [], + "own_tokenizer": false }, { "name": "Tacana", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Araona", "iso_1_code": null, "iso_3_code": "aro", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9585", + "scripts": [], + "own_tokenizer": false }, { "name": "Reyesano", "iso_1_code": null, "iso_3_code": "rey", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9586", + "scripts": [], + "own_tokenizer": false }, { "name": "Tacana", "iso_1_code": null, "iso_3_code": "tna", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9587", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9584", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9579", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Takelman.json b/data/Takelman.json index ac13c0a0266825011965c22d547c9457608b3c41..a817b5f0446976176e28236e8e30e1ee9478182e 100644 --- a/data/Takelman.json +++ b/data/Takelman.json @@ -2,32 +2,40 @@ "name": "Takelman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kalapuya", "iso_1_code": null, "iso_3_code": "kyl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9589", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Kalapuya", "iso_1_code": null, "iso_3_code": "nrt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9590", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Kalapuya", "iso_1_code": null, "iso_3_code": "sxk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9591", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9588", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tarascan.json b/data/Tarascan.json index 11fa1fbf118767583d5949b36ea6c86701d4ae69..f39a321ae5f936b7d0b14d82e1cc1a0911e20f40 100644 --- a/data/Tarascan.json +++ b/data/Tarascan.json @@ -2,24 +2,34 @@ "name": "Tarascan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Purepecha, Western Highland", "iso_1_code": null, "iso_3_code": "pua", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9593", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Purepecha", "iso_1_code": null, "iso_3_code": "tsz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9594", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9592", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tequistlatecan.json b/data/Tequistlatecan.json index d9d3c4aff90f6ee1dc44aac27e71ead020c6c81e..38f70e1945ea8c412fa3710e73f52f08dd4b4b35 100644 --- a/data/Tequistlatecan.json +++ b/data/Tequistlatecan.json @@ -2,24 +2,32 @@ "name": "Tequistlatecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chontal, Highland Oaxaca", "iso_1_code": null, "iso_3_code": "chd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9596", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chontal, Lowland Oaxaca", "iso_1_code": null, "iso_3_code": "clo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9597", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9595", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tiniguan.json b/data/Tiniguan.json index 07f76c8113998ed96148379950a085537459cc62..1e5c144619d5aacc6cfa2b24c747ce21aff01bf0 100644 --- a/data/Tiniguan.json +++ b/data/Tiniguan.json @@ -2,16 +2,20 @@ "name": "Tiniguan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tinigua", "iso_1_code": null, "iso_3_code": "tit", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9599", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9598", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tor-Kwerba.json b/data/Tor-Kwerba.json index caa6e871f0143da956ce14ee6d4af0ba0a775d00..c491e6dc1acf78bd585d70c16d3490034b1714fb 100644 --- a/data/Tor-Kwerba.json +++ b/data/Tor-Kwerba.json @@ -2,273 +2,343 @@ "name": "Tor-Kwerba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Greater Kwerba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Isirawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Isirawa", "iso_1_code": null, "iso_3_code": "srl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9603", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9602", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwerba", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bagusa", "iso_1_code": null, "iso_3_code": "bqb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9606", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwerba", "iso_1_code": null, "iso_3_code": "kwe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9607", + "scripts": [], + "own_tokenizer": false }, { "name": "Trimuris", "iso_1_code": null, "iso_3_code": "tip", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9608", + "scripts": [], + "own_tokenizer": false }, { "name": "Kauwera", "iso_1_code": null, "iso_3_code": "xau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9609", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwerba Mamberamo", "iso_1_code": null, "iso_3_code": "xwr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9610", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9605", + "scripts": [], + "own_tokenizer": false }, { "name": "West Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Airoran", "iso_1_code": null, "iso_3_code": "air", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9612", + "scripts": [], + "own_tokenizer": false }, { "name": "Samarokena", "iso_1_code": null, "iso_3_code": "tmj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9613", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9611", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9604", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9601", + "scripts": [], + "own_tokenizer": false }, { "name": "Orya-Tor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Orya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Orya", "iso_1_code": null, "iso_3_code": "ury", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9616", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9615", + "scripts": [], + "own_tokenizer": false }, { "name": "Sause", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sause", "iso_1_code": null, "iso_3_code": "sao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9618", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9617", + "scripts": [], + "own_tokenizer": false }, { "name": "Tor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Betaf", "iso_1_code": null, "iso_3_code": "bfe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9620", + "scripts": [], + "own_tokenizer": false }, { "name": "Berik", "iso_1_code": null, "iso_3_code": "bkl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9621", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Beneraf", "iso_1_code": null, "iso_3_code": "bnv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9622", + "scripts": [], + "own_tokenizer": false }, { "name": "Dabe", "iso_1_code": null, "iso_3_code": "dbe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9623", + "scripts": [], + "own_tokenizer": false }, { "name": "Itik", "iso_1_code": null, "iso_3_code": "itx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9624", + "scripts": [], + "own_tokenizer": false }, { "name": "Jofotek-Bromnya", "iso_1_code": null, "iso_3_code": "jbr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9625", + "scripts": [], + "own_tokenizer": false }, { "name": "Keijar", "iso_1_code": null, "iso_3_code": "kdy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9626", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwinsu", "iso_1_code": null, "iso_3_code": "kuc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9627", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwesten", "iso_1_code": null, "iso_3_code": "kwt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9628", + "scripts": [], + "own_tokenizer": false }, { "name": "Mander", "iso_1_code": null, "iso_3_code": "mqr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9629", + "scripts": [], + "own_tokenizer": false }, { "name": "Dineor", "iso_1_code": null, "iso_3_code": "mrx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9630", + "scripts": [], + "own_tokenizer": false }, { "name": "Vitou", "iso_1_code": null, "iso_3_code": "vto", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9631", + "scripts": [], + "own_tokenizer": false }, { "name": "Wares", "iso_1_code": null, "iso_3_code": "wai", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9632", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9619", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9614", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9600", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Torricelli.json b/data/Torricelli.json index ede9ce4a3520e8c4549c70e64687079519a53cda..318a9a2f86481a8d16afde4ed3d906bfd330ca19 100644 --- a/data/Torricelli.json +++ b/data/Torricelli.json @@ -2,617 +2,783 @@ "name": "Torricelli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kombio-Arapesh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arapesh", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abu\u2019", "iso_1_code": null, "iso_3_code": "aah", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9636", + "scripts": [], + "own_tokenizer": false }, { "name": "Mufian", "iso_1_code": null, "iso_3_code": "aoj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9637", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Weri", "iso_1_code": null, "iso_3_code": "aon", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9638", + "scripts": [], + "own_tokenizer": false }, { "name": "Bukiyip", "iso_1_code": null, "iso_3_code": "ape", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9639", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9635", + "scripts": [], + "own_tokenizer": false }, { "name": "Kombio", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aruek", "iso_1_code": null, "iso_3_code": "aur", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9641", + "scripts": [], + "own_tokenizer": false }, { "name": "Eitiep", "iso_1_code": null, "iso_3_code": "eit", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9642", + "scripts": [], + "own_tokenizer": false }, { "name": "Aro", "iso_1_code": null, "iso_3_code": "tei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9643", + "scripts": [], + "own_tokenizer": false }, { "name": "Wom", "iso_1_code": null, "iso_3_code": "wmo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9644", + "scripts": [], + "own_tokenizer": false }, { "name": "Kombio", "iso_1_code": null, "iso_3_code": "xbi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9645", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yambes", "iso_1_code": null, "iso_3_code": "ymb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9646", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9640", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9634", + "scripts": [], + "own_tokenizer": false }, { "name": "Maimai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Beli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Beli", "iso_1_code": null, "iso_3_code": "bey", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9649", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9648", + "scripts": [], + "own_tokenizer": false }, { "name": "Laeko-Libuat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Laeko-Libuat", "iso_1_code": null, "iso_3_code": "lkl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9651", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9650", + "scripts": [], + "own_tokenizer": false }, { "name": "Maimai Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Heiyoho", "iso_1_code": null, "iso_3_code": "auk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9653", + "scripts": [], + "own_tokenizer": false }, { "name": "Siliput", "iso_1_code": null, "iso_3_code": "mkc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9654", + "scripts": [], + "own_tokenizer": false }, { "name": "Yahang", "iso_1_code": null, "iso_3_code": "rhp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9655", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9652", + "scripts": [], + "own_tokenizer": false }, { "name": "Wiaki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Minidien", "iso_1_code": null, "iso_3_code": "wii", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9657", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9656", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9647", + "scripts": [], + "own_tokenizer": false }, { "name": "Marienberg", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bungain", "iso_1_code": null, "iso_3_code": "but", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9659", + "scripts": [], + "own_tokenizer": false }, { "name": "Buna", "iso_1_code": null, "iso_3_code": "bvn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9660", + "scripts": [], + "own_tokenizer": false }, { "name": "Elepi", "iso_1_code": null, "iso_3_code": "ele", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9661", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamasau", "iso_1_code": null, "iso_3_code": "kms", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9662", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Juwar", "iso_1_code": null, "iso_3_code": "mwb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9663", + "scripts": [], + "own_tokenizer": false }, { "name": "Wiarumus", "iso_1_code": null, "iso_3_code": "tua", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9664", + "scripts": [], + "own_tokenizer": false }, { "name": "Urimo", "iso_1_code": null, "iso_3_code": "urx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9665", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9658", + "scripts": [], + "own_tokenizer": false }, { "name": "Monumbo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lilau", "iso_1_code": null, "iso_3_code": "lll", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9667", + "scripts": [], + "own_tokenizer": false }, { "name": "Monumbo", "iso_1_code": null, "iso_3_code": "mxk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9668", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9666", + "scripts": [], + "own_tokenizer": false }, { "name": "Urim", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Urim", "iso_1_code": null, "iso_3_code": "uri", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9670", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9669", + "scripts": [], + "own_tokenizer": false }, { "name": "Wapei-Palei", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Palei", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ambrak", "iso_1_code": null, "iso_3_code": "aag", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9673", + "scripts": [], + "own_tokenizer": false }, { "name": "Agi", "iso_1_code": null, "iso_3_code": "aif", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9674", + "scripts": [], + "own_tokenizer": false }, { "name": "Mol", "iso_1_code": null, "iso_3_code": "alx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9675", + "scripts": [], + "own_tokenizer": false }, { "name": "Bragat", "iso_1_code": null, "iso_3_code": "aof", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9676", + "scripts": [], + "own_tokenizer": false }, { "name": "Aruop", "iso_1_code": null, "iso_3_code": "lsr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9677", + "scripts": [], + "own_tokenizer": false }, { "name": "Nabi", "iso_1_code": null, "iso_3_code": "mty", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9678", + "scripts": [], + "own_tokenizer": false }, { "name": "Wanap", "iso_1_code": null, "iso_3_code": "wnp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9679", + "scripts": [], + "own_tokenizer": false }, { "name": "Yangum Dey", "iso_1_code": null, "iso_3_code": "yde", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9680", + "scripts": [], + "own_tokenizer": false }, { "name": "Yangum Gel", "iso_1_code": null, "iso_3_code": "ygl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9681", + "scripts": [], + "own_tokenizer": false }, { "name": "Yangum Mon", "iso_1_code": null, "iso_3_code": "ymo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9682", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9672", + "scripts": [], + "own_tokenizer": false }, { "name": "Urat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Urat", "iso_1_code": null, "iso_3_code": "urt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9684", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9683", + "scripts": [], + "own_tokenizer": false }, { "name": "Wapei", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Au", "iso_1_code": null, "iso_3_code": "avt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9686", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dia", "iso_1_code": null, "iso_3_code": "dia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9687", + "scripts": [], + "own_tokenizer": false }, { "name": "Elkei", "iso_1_code": null, "iso_3_code": "elk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9688", + "scripts": [], + "own_tokenizer": false }, { "name": "Gnau", "iso_1_code": null, "iso_3_code": "gnu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9689", + "scripts": [], + "own_tokenizer": false }, { "name": "Ningil", "iso_1_code": null, "iso_3_code": "niz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9690", + "scripts": [], + "own_tokenizer": false }, { "name": "Olo", "iso_1_code": null, "iso_3_code": "ong", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9691", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sinagen", "iso_1_code": null, "iso_3_code": "siu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9692", + "scripts": [], + "own_tokenizer": false }, { "name": "Walman", "iso_1_code": null, "iso_3_code": "van", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9693", + "scripts": [], + "own_tokenizer": false }, { "name": "Yeri", "iso_1_code": null, "iso_3_code": "yev", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9694", + "scripts": [], + "own_tokenizer": false }, { "name": "Yis", "iso_1_code": null, "iso_3_code": "yis", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9695", + "scripts": [], + "own_tokenizer": false }, { "name": "Yil", "iso_1_code": null, "iso_3_code": "yll", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9696", + "scripts": [], + "own_tokenizer": false }, { "name": "Yau", "iso_1_code": null, "iso_3_code": "yyu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9697", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9685", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9671", + "scripts": [], + "own_tokenizer": false }, { "name": "West Wapei", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Seti", "iso_1_code": null, "iso_3_code": "sbi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9699", + "scripts": [], + "own_tokenizer": false }, { "name": "Seta", "iso_1_code": null, "iso_3_code": "stf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9700", + "scripts": [], + "own_tokenizer": false }, { "name": "One", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "One, Molmo", "iso_1_code": null, "iso_3_code": "aun", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9702", + "scripts": [], + "own_tokenizer": false }, { "name": "One, Inebu", "iso_1_code": null, "iso_3_code": "oin", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9703", + "scripts": [], + "own_tokenizer": false }, { "name": "One, Kwamtim", "iso_1_code": null, "iso_3_code": "okk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9704", + "scripts": [], + "own_tokenizer": false }, { "name": "One, Kabore", "iso_1_code": null, "iso_3_code": "onk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9705", + "scripts": [], + "own_tokenizer": false }, { "name": "One, Northern", "iso_1_code": null, "iso_3_code": "onr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9706", + "scripts": [], + "own_tokenizer": false }, { "name": "One, Southern", "iso_1_code": null, "iso_3_code": "osu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9707", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9701", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9698", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9633", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Totonacan.json b/data/Totonacan.json index ff8744c4ae471fa026e00d1d16ceb9b29a655f22..362302a8a90e8903c6f424a30f182a625e2bb922 100644 --- a/data/Totonacan.json +++ b/data/Totonacan.json @@ -2,122 +2,168 @@ "name": "Totonacan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tepehua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tepehua, Huehuetla", "iso_1_code": null, "iso_3_code": "tee", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9710", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tepehua, Pisaflores", "iso_1_code": null, "iso_3_code": "tpp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9711", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tepehua, Tlachichilco", "iso_1_code": null, "iso_3_code": "tpt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9712", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9709", + "scripts": [], + "own_tokenizer": false }, { "name": "Totonac", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Totonac, Tecpatl\u00e1n", "iso_1_code": null, "iso_3_code": "tcw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9714", + "scripts": [], + "own_tokenizer": false }, { "name": "Totonac, Upper Necaxa", "iso_1_code": null, "iso_3_code": "tku", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9715", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Totonac, Yecuatla", "iso_1_code": null, "iso_3_code": "tlc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9716", + "scripts": [], + "own_tokenizer": false }, { "name": "Totonac, Filomena Mata-Coahuitl\u00e1n", "iso_1_code": null, "iso_3_code": "tlp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9717", + "scripts": [], + "own_tokenizer": false }, { "name": "Totonac, Coyutla", "iso_1_code": null, "iso_3_code": "toc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9718", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Totonac, Xicotepec de Ju\u00e1rez", "iso_1_code": null, "iso_3_code": "too", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9719", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Totonac, Papantla", "iso_1_code": null, "iso_3_code": "top", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9720", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Totonac, Highland", "iso_1_code": null, "iso_3_code": "tos", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9721", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Totonaco del cerro Xinolat\u00e9petl", "iso_1_code": null, "iso_3_code": "tqt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9722", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9713", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9708", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Trans-New Guinea.json b/data/Trans-New Guinea.json index 66cd96361626c3b5a453c1b0009901badb60a975..66c84a2f49b0ec9405f736588f2fa45102d42ec0 100644 --- a/data/Trans-New Guinea.json +++ b/data/Trans-New Guinea.json @@ -2,5548 +2,7162 @@ "name": "Trans-New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angaatiha", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angaataha", "iso_1_code": null, "iso_3_code": "agm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9726", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9725", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear Angan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ankave", "iso_1_code": null, "iso_3_code": "aak", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9728", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tainae", "iso_1_code": null, "iso_3_code": "ago", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9729", + "scripts": [], + "own_tokenizer": false }, { "name": "Safeyoka", "iso_1_code": null, "iso_3_code": "apz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9730", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yipma", "iso_1_code": null, "iso_3_code": "byr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9731", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hamtai", "iso_1_code": null, "iso_3_code": "hmt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9732", + "scripts": [], + "own_tokenizer": false }, { "name": "Kawacha", "iso_1_code": null, "iso_3_code": "kcb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9733", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamasa", "iso_1_code": null, "iso_3_code": "klp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9734", + "scripts": [], + "own_tokenizer": false }, { "name": "Menya", "iso_1_code": null, "iso_3_code": "mcr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9735", + "scripts": [], + "own_tokenizer": false }, { "name": "Akoye", "iso_1_code": null, "iso_3_code": "miw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9736", + "scripts": [], + "own_tokenizer": false }, { "name": "Simbari", "iso_1_code": null, "iso_3_code": "smb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9737", + "scripts": [], + "own_tokenizer": false }, { "name": "Susuami", "iso_1_code": null, "iso_3_code": "ssu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9738", + "scripts": [], + "own_tokenizer": false }, { "name": "Yagwoia", "iso_1_code": null, "iso_3_code": "ygw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9739", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9727", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9724", + "scripts": [], + "own_tokenizer": false }, { "name": "Asmat-Kamoro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Asmat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Asmat, Casuarina Coast", "iso_1_code": null, "iso_3_code": "asc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9742", + "scripts": [], + "own_tokenizer": false }, { "name": "Asmat, Yaosakor", "iso_1_code": null, "iso_3_code": "asy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9743", + "scripts": [], + "own_tokenizer": false }, { "name": "Asmat, Central", "iso_1_code": null, "iso_3_code": "cns", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9744", + "scripts": [], + "own_tokenizer": false }, { "name": "Asmat, North", "iso_1_code": null, "iso_3_code": "nks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9745", + "scripts": [], + "own_tokenizer": false }, { "name": "Citak, Tamnim", "iso_1_code": null, "iso_3_code": "tml", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9746", + "scripts": [], + "own_tokenizer": false }, { "name": "Citak", "iso_1_code": null, "iso_3_code": "txt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9747", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9741", + "scripts": [], + "own_tokenizer": false }, { "name": "Diuwe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Diuwe", "iso_1_code": null, "iso_3_code": "diy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9749", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9748", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamoro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kamoro", "iso_1_code": null, "iso_3_code": "kgq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9751", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9750", + "scripts": [], + "own_tokenizer": false }, { "name": "Sabakor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Buruwai", "iso_1_code": null, "iso_3_code": "asi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9753", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamberau", "iso_1_code": null, "iso_3_code": "irx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9754", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9752", + "scripts": [], + "own_tokenizer": false }, { "name": "Sempan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sempan", "iso_1_code": null, "iso_3_code": "xse", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9756", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9755", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9740", + "scripts": [], + "own_tokenizer": false }, { "name": "Awin-Pare", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aekyom", "iso_1_code": null, "iso_3_code": "awi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9758", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pa", "iso_1_code": null, "iso_3_code": "ppt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9759", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9757", + "scripts": [], + "own_tokenizer": false }, { "name": "Bosavi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eibela", "iso_1_code": null, "iso_3_code": "ail", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9761", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaluli", "iso_1_code": null, "iso_3_code": "bco", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9762", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bedamuni", "iso_1_code": null, "iso_3_code": "beo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9763", + "scripts": [], + "own_tokenizer": false }, { "name": "Dibiyaso", "iso_1_code": null, "iso_3_code": "dby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9764", + "scripts": [], + "own_tokenizer": false }, { "name": "Edolo", "iso_1_code": null, "iso_3_code": "etr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9765", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kasua", "iso_1_code": null, "iso_3_code": "khs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9766", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Onobasulu", "iso_1_code": null, "iso_3_code": "onn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9767", + "scripts": [], + "own_tokenizer": false }, { "name": "Sonia", "iso_1_code": null, "iso_3_code": "siq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9768", + "scripts": [], + "own_tokenizer": false }, { "name": "Turumsa", "iso_1_code": null, "iso_3_code": "tqm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9769", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9760", + "scripts": [], + "own_tokenizer": false }, { "name": "Chimbu-Wahgi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chimbu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Chuave", "iso_1_code": null, "iso_3_code": "cjv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9772", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Dom", "iso_1_code": null, "iso_3_code": "doa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9773", + "scripts": [], + "own_tokenizer": false }, { "name": "Golin", "iso_1_code": null, "iso_3_code": "gvf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9774", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kuman", "iso_1_code": null, "iso_3_code": "kue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9775", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nomane", "iso_1_code": null, "iso_3_code": "nof", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9776", + "scripts": [], + "own_tokenizer": false }, { "name": "Yui", "iso_1_code": null, "iso_3_code": "sll", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9777", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sinasina", "iso_1_code": null, "iso_3_code": "sst", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9778", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9771", + "scripts": [], + "own_tokenizer": false }, { "name": "Hagen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaugel", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Imbongu", "iso_1_code": null, "iso_3_code": "imo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9781", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bo-Ung", "iso_1_code": null, "iso_3_code": "mux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9782", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Umbu-Ungu", "iso_1_code": null, "iso_3_code": "ubu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9783", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9780", + "scripts": [], + "own_tokenizer": false }, { "name": "Melpa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Melpa", "iso_1_code": null, "iso_3_code": "med", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9785", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9784", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9779", + "scripts": [], + "own_tokenizer": false }, { "name": "Jimi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kandawo", "iso_1_code": null, "iso_3_code": "gam", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9787", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maring", "iso_1_code": null, "iso_3_code": "mbw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9788", + "scripts": [], + "own_tokenizer": false }, { "name": "Narak", "iso_1_code": null, "iso_3_code": "nac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9789", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9786", + "scripts": [], + "own_tokenizer": false }, { "name": "Wahgi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nii", "iso_1_code": null, "iso_3_code": "nii", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9791", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wahgi", "iso_1_code": null, "iso_3_code": "wgi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9792", + "scripts": [], + "own_tokenizer": false }, { "name": "Yuwei", "iso_1_code": null, "iso_3_code": "whg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9793", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9790", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9770", + "scripts": [], + "own_tokenizer": false }, { "name": "Damal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Damal", "iso_1_code": null, "iso_3_code": "uhn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9795", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9794", + "scripts": [], + "own_tokenizer": false }, { "name": "Dem", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dem", "iso_1_code": null, "iso_3_code": "dem", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9797", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9796", + "scripts": [], + "own_tokenizer": false }, { "name": "Duna-Bogaya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bogaya", "iso_1_code": null, "iso_3_code": "boq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9799", + "scripts": [], + "own_tokenizer": false }, { "name": "Duna", "iso_1_code": null, "iso_3_code": "duc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9800", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9798", + "scripts": [], + "own_tokenizer": false }, { "name": "East Kutubu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fiwaga", "iso_1_code": null, "iso_3_code": "fiw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9802", + "scripts": [], + "own_tokenizer": false }, { "name": "Foi", "iso_1_code": null, "iso_3_code": "foi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9803", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9801", + "scripts": [], + "own_tokenizer": false }, { "name": "East Strickland", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fembe", "iso_1_code": null, "iso_3_code": "agl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9805", + "scripts": [], + "own_tokenizer": false }, { "name": "Gebusi", "iso_1_code": null, "iso_3_code": "goi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9806", + "scripts": [], + "own_tokenizer": false }, { "name": "Kubo", "iso_1_code": null, "iso_3_code": "jko", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9807", + "scripts": [], + "own_tokenizer": false }, { "name": "Odoodee", "iso_1_code": null, "iso_3_code": "kkc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9808", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Konai", "iso_1_code": null, "iso_3_code": "kxw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9809", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Samo", "iso_1_code": null, "iso_3_code": "smq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9810", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9804", + "scripts": [], + "own_tokenizer": false }, { "name": "Eleman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nuclear Eleman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Toaripi", "iso_1_code": null, "iso_3_code": "tqo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9814", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tairuma", "iso_1_code": null, "iso_3_code": "uar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9815", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9813", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Opao", "iso_1_code": null, "iso_3_code": "opo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9817", + "scripts": [], + "own_tokenizer": false }, { "name": "Orokolo", "iso_1_code": null, "iso_3_code": "oro", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9818", + "scripts": [], + "own_tokenizer": false }, { "name": "Keoru-Ahia", "iso_1_code": null, "iso_3_code": "xeu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9819", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9816", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9812", + "scripts": [], + "own_tokenizer": false }, { "name": "Purari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Purari", "iso_1_code": null, "iso_3_code": "iar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9821", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9820", + "scripts": [], + "own_tokenizer": false }, { "name": "Tate", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kaki Ae", "iso_1_code": null, "iso_3_code": "tbd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9823", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9822", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9811", + "scripts": [], + "own_tokenizer": false }, { "name": "Engan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angal-Kewa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Angal", "iso_1_code": null, "iso_3_code": "age", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9826", + "scripts": [], + "own_tokenizer": false }, { "name": "Angal Heneng", "iso_1_code": null, "iso_3_code": "akh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9827", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Angal Enen", "iso_1_code": null, "iso_3_code": "aoe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9828", + "scripts": [], + "own_tokenizer": false }, { "name": "Kewapi, West", "iso_1_code": null, "iso_3_code": "kew", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9829", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kewapi, East", "iso_1_code": null, "iso_3_code": "kjs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9830", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pole", "iso_1_code": null, "iso_3_code": "kjy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9831", + "scripts": [], + "own_tokenizer": false }, { "name": "Samberigi", "iso_1_code": null, "iso_3_code": "ssx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9832", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9825", + "scripts": [], + "own_tokenizer": false }, { "name": "Enga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bisorio", "iso_1_code": null, "iso_3_code": "bir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9834", + "scripts": [], + "own_tokenizer": false }, { "name": "Enga", "iso_1_code": null, "iso_3_code": "enq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9835", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ipili", "iso_1_code": null, "iso_3_code": "ipi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9836", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kyaka", "iso_1_code": null, "iso_3_code": "kyc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9837", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Lembena", "iso_1_code": null, "iso_3_code": "leq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9838", + "scripts": [], + "own_tokenizer": false }, { "name": "Nete", "iso_1_code": null, "iso_3_code": "net", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9839", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9833", + "scripts": [], + "own_tokenizer": false }, { "name": "Huli", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Huli", "iso_1_code": null, "iso_3_code": "hui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9841", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9840", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9824", + "scripts": [], + "own_tokenizer": false }, { "name": "Finisterre-Huon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Finisterre", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Erap", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Finongan", "iso_1_code": null, "iso_3_code": "fag", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9845", + "scripts": [], + "own_tokenizer": false }, { "name": "Nema", "iso_1_code": null, "iso_3_code": "gsn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9846", + "scripts": [], + "own_tokenizer": false }, { "name": "Doloman", "iso_1_code": null, "iso_3_code": "mhf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9847", + "scripts": [], + "own_tokenizer": false }, { "name": "Mungkip", "iso_1_code": null, "iso_3_code": "mpv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9848", + "scripts": [], + "own_tokenizer": false }, { "name": "Nakame", "iso_1_code": null, "iso_3_code": "nib", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9849", + "scripts": [], + "own_tokenizer": false }, { "name": "Nek", "iso_1_code": null, "iso_3_code": "nif", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9850", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sama", "iso_1_code": null, "iso_3_code": "nis", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9851", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuk", "iso_1_code": null, "iso_3_code": "noc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9852", + "scripts": [], + "own_tokenizer": false }, { "name": "Numanggang", "iso_1_code": null, "iso_3_code": "nop", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9853", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ma Manda", "iso_1_code": null, "iso_3_code": "skc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9854", + "scripts": [], + "own_tokenizer": false }, { "name": "Uri", "iso_1_code": null, "iso_3_code": "uvh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9855", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9844", + "scripts": [], + "own_tokenizer": false }, { "name": "Gusap-Mot", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Madi", "iso_1_code": null, "iso_3_code": "grg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9857", + "scripts": [], + "own_tokenizer": false }, { "name": "Iyo", "iso_1_code": null, "iso_3_code": "nca", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9858", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Neko", "iso_1_code": null, "iso_3_code": "nej", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9859", + "scripts": [], + "own_tokenizer": false }, { "name": "Nekgini", "iso_1_code": null, "iso_3_code": "nkg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9860", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngaing", "iso_1_code": null, "iso_3_code": "nnf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9861", + "scripts": [], + "own_tokenizer": false }, { "name": "Rawa", "iso_1_code": null, "iso_3_code": "rwo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9862", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ufim", "iso_1_code": null, "iso_3_code": "ufi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9863", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9856", + "scripts": [], + "own_tokenizer": false }, { "name": "Uruwa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Weliki", "iso_1_code": null, "iso_3_code": "klh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9865", + "scripts": [], + "own_tokenizer": false }, { "name": "Nukna", "iso_1_code": null, "iso_3_code": "klt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9866", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kutong", "iso_1_code": null, "iso_3_code": "skm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9867", + "scripts": [], + "own_tokenizer": false }, { "name": "Tayatuk", "iso_1_code": null, "iso_3_code": "smc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9868", + "scripts": [], + "own_tokenizer": false }, { "name": "Yau", "iso_1_code": null, "iso_3_code": "yuw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9869", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9864", + "scripts": [], + "own_tokenizer": false }, { "name": "Wantoat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awara", "iso_1_code": null, "iso_3_code": "awx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9871", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tuma-Irumu", "iso_1_code": null, "iso_3_code": "iou", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9872", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wantoat", "iso_1_code": null, "iso_3_code": "wnc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9873", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9870", + "scripts": [], + "own_tokenizer": false }, { "name": "Warup", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Muratayak", "iso_1_code": null, "iso_3_code": "asx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9875", + "scripts": [], + "own_tokenizer": false }, { "name": "Gamane", "iso_1_code": null, "iso_3_code": "bmp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9876", + "scripts": [], + "own_tokenizer": false }, { "name": "Gwahatike", "iso_1_code": null, "iso_3_code": "dah", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9877", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Degenang", "iso_1_code": null, "iso_3_code": "dge", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9878", + "scripts": [], + "own_tokenizer": false }, { "name": "Forak", "iso_1_code": null, "iso_3_code": "frq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9879", + "scripts": [], + "own_tokenizer": false }, { "name": "Guya", "iso_1_code": null, "iso_3_code": "gka", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9880", + "scripts": [], + "own_tokenizer": false }, { "name": "Asaro\u2019o", "iso_1_code": null, "iso_3_code": "mtv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9881", + "scripts": [], + "own_tokenizer": false }, { "name": "Tand\u0268", "iso_1_code": null, "iso_3_code": "ygm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9882", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9874", + "scripts": [], + "own_tokenizer": false }, { "name": "Yupna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bonkiman", "iso_1_code": null, "iso_3_code": "bop", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9884", + "scripts": [], + "own_tokenizer": false }, { "name": "Domung", "iso_1_code": null, "iso_3_code": "dev", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9885", + "scripts": [], + "own_tokenizer": false }, { "name": "Ma", "iso_1_code": null, "iso_3_code": "mjn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9886", + "scripts": [], + "own_tokenizer": false }, { "name": "Nankina", "iso_1_code": null, "iso_3_code": "nnk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9887", + "scripts": [], + "own_tokenizer": false }, { "name": "Yout Wam", "iso_1_code": null, "iso_3_code": "ytw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9888", + "scripts": [], + "own_tokenizer": false }, { "name": "Yopno", "iso_1_code": null, "iso_3_code": "yut", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9889", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9883", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9843", + "scripts": [], + "own_tokenizer": false }, { "name": "Huon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dedua", "iso_1_code": null, "iso_3_code": "ded", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9892", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kube", "iso_1_code": null, "iso_3_code": "kgf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9893", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "K\u00e2te", "iso_1_code": null, "iso_3_code": "kmg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9894", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Borong", "iso_1_code": null, "iso_3_code": "ksr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9895", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mape", "iso_1_code": null, "iso_3_code": "mlh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9896", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Migabac", "iso_1_code": null, "iso_3_code": "mpp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9897", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Momare", "iso_1_code": null, "iso_3_code": "msz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9898", + "scripts": [], + "own_tokenizer": false }, { "name": "Sene", "iso_1_code": null, "iso_3_code": "sej", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9899", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9891", + "scripts": [], + "own_tokenizer": false }, { "name": "Kovai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kovai", "iso_1_code": null, "iso_3_code": "kqb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9901", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9900", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Somba-Siawari", "iso_1_code": null, "iso_3_code": "bmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9903", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kinalakna", "iso_1_code": null, "iso_3_code": "kco", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9904", + "scripts": [], + "own_tokenizer": false }, { "name": "Komba", "iso_1_code": null, "iso_3_code": "kpf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9905", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kumukio", "iso_1_code": null, "iso_3_code": "kuo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9906", + "scripts": [], + "own_tokenizer": false }, { "name": "Mesem", "iso_1_code": null, "iso_3_code": "mci", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9907", + "scripts": [], + "own_tokenizer": false }, { "name": "Nabak", "iso_1_code": null, "iso_3_code": "naf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9908", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nomu", "iso_1_code": null, "iso_3_code": "noh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9909", + "scripts": [], + "own_tokenizer": false }, { "name": "Ono", "iso_1_code": null, "iso_3_code": "ons", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9910", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sialum", "iso_1_code": null, "iso_3_code": "slw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9911", + "scripts": [], + "own_tokenizer": false }, { "name": "Selepet", "iso_1_code": null, "iso_3_code": "spl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9912", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tobo", "iso_1_code": null, "iso_3_code": "tbv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9913", + "scripts": [], + "own_tokenizer": false }, { "name": "Timbe", "iso_1_code": null, "iso_3_code": "tim", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9914", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9902", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9890", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9842", + "scripts": [], + "own_tokenizer": false }, { "name": "Gogodala-Suki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gogodala", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ari", "iso_1_code": null, "iso_3_code": "aac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9917", + "scripts": [], + "own_tokenizer": false }, { "name": "Gogodala", "iso_1_code": null, "iso_3_code": "ggw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9918", + "scripts": [], + "own_tokenizer": false }, { "name": "Waruna", "iso_1_code": null, "iso_3_code": "wrv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9919", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9916", + "scripts": [], + "own_tokenizer": false }, { "name": "Suki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Suki", "iso_1_code": null, "iso_3_code": "sui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9921", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9920", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9915", + "scripts": [], + "own_tokenizer": false }, { "name": "Greater Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "North Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Suena", "iso_1_code": null, "iso_3_code": "sue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9925", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zia", "iso_1_code": null, "iso_3_code": "zia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9926", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9924", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Binandere", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Binandere", "iso_1_code": null, "iso_3_code": "bhg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9929", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9928", + "scripts": [], + "own_tokenizer": false }, { "name": "South Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Coastal Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baruga", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baruga", "iso_1_code": null, "iso_3_code": "bjz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9933", + "scripts": [], + "own_tokenizer": false }, { "name": "Doghoro", "iso_1_code": null, "iso_3_code": "dgx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9934", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9932", + "scripts": [], + "own_tokenizer": false }, { "name": "Gaena-Korafe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gaina", "iso_1_code": null, "iso_3_code": "gcn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9936", + "scripts": [], + "own_tokenizer": false }, { "name": "Korafe-Yegha", "iso_1_code": null, "iso_3_code": "kpr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9937", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9935", + "scripts": [], + "own_tokenizer": false }, { "name": "Notu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ewage-Notu", "iso_1_code": null, "iso_3_code": "nou", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9939", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9938", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9931", + "scripts": [], + "own_tokenizer": false }, { "name": "Orokaivan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aeka", "iso_1_code": null, "iso_3_code": "aez", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9941", + "scripts": [], + "own_tokenizer": false }, { "name": "Hunjara-Kaina Ke", "iso_1_code": null, "iso_3_code": "hkk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9942", + "scripts": [], + "own_tokenizer": false }, { "name": "Orokaiva", "iso_1_code": null, "iso_3_code": "okv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9943", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9940", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9930", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9927", + "scripts": [], + "own_tokenizer": false }, { "name": "Yekora", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yekora", "iso_1_code": null, "iso_3_code": "ykr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9945", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9944", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9923", + "scripts": [], + "own_tokenizer": false }, { "name": "Guhu-Samane", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guhu-Samane", "iso_1_code": null, "iso_3_code": "ghs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9947", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9946", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9922", + "scripts": [], + "own_tokenizer": false }, { "name": "Inland Gulf", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ipiko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ipiko", "iso_1_code": null, "iso_3_code": "ipo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9950", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9949", + "scripts": [], + "own_tokenizer": false }, { "name": "Minanibai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Foia Foia", "iso_1_code": null, "iso_3_code": "ffi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9952", + "scripts": [], + "own_tokenizer": false }, { "name": "Hoia Hoia", "iso_1_code": null, "iso_3_code": "hhi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9953", + "scripts": [], + "own_tokenizer": false }, { "name": "Hoyahoya", "iso_1_code": null, "iso_3_code": "hhy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9954", + "scripts": [], + "own_tokenizer": false }, { "name": "Minanibai", "iso_1_code": null, "iso_3_code": "mcv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9955", + "scripts": [], + "own_tokenizer": false }, { "name": "Mubami", "iso_1_code": null, "iso_3_code": "tsx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9956", + "scripts": [], + "own_tokenizer": false }, { "name": "Karami", "iso_1_code": null, "iso_3_code": "xar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9957", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9951", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9948", + "scripts": [], + "own_tokenizer": false }, { "name": "Kainantu-Goroka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gorokan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fore", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fore", "iso_1_code": null, "iso_3_code": "for", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9961", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gimi", "iso_1_code": null, "iso_3_code": "gim", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9962", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9960", + "scripts": [], + "own_tokenizer": false }, { "name": "Gahuku-Benabena", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dano", "iso_1_code": null, "iso_3_code": "aso", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9964", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Benabena", "iso_1_code": null, "iso_3_code": "bef", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9965", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Alekano", "iso_1_code": null, "iso_3_code": "gah", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9966", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tokano", "iso_1_code": null, "iso_3_code": "zuh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9967", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9963", + "scripts": [], + "own_tokenizer": false }, { "name": "Gende", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gende", "iso_1_code": null, "iso_3_code": "gaf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9969", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9968", + "scripts": [], + "own_tokenizer": false }, { "name": "Isabi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Isabi", "iso_1_code": null, "iso_3_code": "isa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9971", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9970", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamano-Yagaria", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wagama", "iso_1_code": null, "iso_3_code": "abg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9973", + "scripts": [], + "own_tokenizer": false }, { "name": "Inoke-Yate", "iso_1_code": null, "iso_3_code": "ino", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9974", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kamano", "iso_1_code": null, "iso_3_code": "kbq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9975", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kanite", "iso_1_code": null, "iso_3_code": "kmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9976", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Keyagana", "iso_1_code": null, "iso_3_code": "kyg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9977", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yagaria", "iso_1_code": null, "iso_3_code": "ygr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9978", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9972", + "scripts": [], + "own_tokenizer": false }, { "name": "Siane", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Siane", "iso_1_code": null, "iso_3_code": "snp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9980", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yaweyuha", "iso_1_code": null, "iso_3_code": "yby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9981", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9979", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9959", + "scripts": [], + "own_tokenizer": false }, { "name": "Kainantu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aziana", "iso_1_code": null, "iso_3_code": "gat", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9983", + "scripts": [], + "own_tokenizer": false }, { "name": "Gadsup-Auyana-Awa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Agarabi", "iso_1_code": null, "iso_3_code": "agd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9985", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Awiyaana", "iso_1_code": null, "iso_3_code": "auy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9986", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Awa", "iso_1_code": null, "iso_3_code": "awb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9987", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gadsup", "iso_1_code": null, "iso_3_code": "gaj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9988", + "scripts": [], + "own_tokenizer": false }, { "name": "Kosena", "iso_1_code": null, "iso_3_code": "kze", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9989", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ontenu", "iso_1_code": null, "iso_3_code": "ont", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9990", + "scripts": [], + "own_tokenizer": false }, { "name": "Usarufa", "iso_1_code": null, "iso_3_code": "usa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9991", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9984", + "scripts": [], + "own_tokenizer": false }, { "name": "Kambaira", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Asa\u2019a", "iso_1_code": null, "iso_3_code": "kyy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9993", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9992", + "scripts": [], + "own_tokenizer": false }, { "name": "Owenia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Owenia", "iso_1_code": null, "iso_3_code": "wsr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9995", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9994", + "scripts": [], + "own_tokenizer": false }, { "name": "Tairora", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Binumarien", "iso_1_code": null, "iso_3_code": "bjr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9997", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tairora, South", "iso_1_code": null, "iso_3_code": "omw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9998", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tairora, North", "iso_1_code": null, "iso_3_code": "tbg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "9999", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Waffa", "iso_1_code": null, "iso_3_code": "waj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10000", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "9996", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9982", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9958", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamula", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kamula", "iso_1_code": null, "iso_3_code": "xla", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10002", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10001", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayagar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Atohwaim", "iso_1_code": null, "iso_3_code": "aqm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10004", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayagar", "iso_1_code": null, "iso_3_code": "kyt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10005", + "scripts": [], + "own_tokenizer": false }, { "name": "Tamagario", "iso_1_code": null, "iso_3_code": "tcg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10006", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10003", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiwaian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bamu", "iso_1_code": null, "iso_3_code": "bcf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10008", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiwai, Northeast", "iso_1_code": null, "iso_3_code": "kiw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10009", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiwai, Southern", "iso_1_code": null, "iso_3_code": "kjd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10010", + "scripts": [], + "own_tokenizer": false }, { "name": "Waboda", "iso_1_code": null, "iso_3_code": "kmx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10011", + "scripts": [], + "own_tokenizer": false }, { "name": "Kerewo", "iso_1_code": null, "iso_3_code": "kxz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10012", + "scripts": [], + "own_tokenizer": false }, { "name": "Morigi", "iso_1_code": null, "iso_3_code": "mdb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10013", + "scripts": [], + "own_tokenizer": false }, { "name": "Kibiri", "iso_1_code": null, "iso_3_code": "prm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10014", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10007", + "scripts": [], + "own_tokenizer": false }, { "name": "Kolopom", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kimaghima", "iso_1_code": null, "iso_3_code": "kig", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10016", + "scripts": [], + "own_tokenizer": false }, { "name": "Ndom", "iso_1_code": null, "iso_3_code": "nqm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10017", + "scripts": [], + "own_tokenizer": false }, { "name": "Riantana", "iso_1_code": null, "iso_3_code": "ran", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10018", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10015", + "scripts": [], + "own_tokenizer": false }, { "name": "Madang", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Croisilles", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amaimon", "iso_1_code": null, "iso_3_code": "ali", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10021", + "scripts": [], + "own_tokenizer": false }, { "name": "Kare", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kare", "iso_1_code": null, "iso_3_code": "kmf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10023", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10022", + "scripts": [], + "own_tokenizer": false }, { "name": "Kokon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Girawa", "iso_1_code": null, "iso_3_code": "bbr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10025", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kein", "iso_1_code": null, "iso_3_code": "bmh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10026", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Munit", "iso_1_code": null, "iso_3_code": "mtc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10027", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10024", + "scripts": [], + "own_tokenizer": false }, { "name": "Kowan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amako", "iso_1_code": null, "iso_3_code": "koz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10029", + "scripts": [], + "own_tokenizer": false }, { "name": "Waskia", "iso_1_code": null, "iso_3_code": "wsk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10030", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10028", + "scripts": [], + "own_tokenizer": false }, { "name": "Mabuso", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amele", "iso_1_code": null, "iso_3_code": "aey", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10033", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Fulumu", "iso_1_code": null, "iso_3_code": "bbd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10034", + "scripts": [], + "own_tokenizer": false }, { "name": "Gumalu", "iso_1_code": null, "iso_3_code": "gmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10035", + "scripts": [], + "own_tokenizer": false }, { "name": "Sihan", "iso_1_code": null, "iso_3_code": "snr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10036", + "scripts": [], + "own_tokenizer": false }, { "name": "Panim-Isebe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Isebe", "iso_1_code": null, "iso_3_code": "igo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10038", + "scripts": [], + "own_tokenizer": false }, { "name": "Panim", "iso_1_code": null, "iso_3_code": "pnr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10039", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10037", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10032", + "scripts": [], + "own_tokenizer": false }, { "name": "Hanseman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baimak", "iso_1_code": null, "iso_3_code": "bmx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10041", + "scripts": [], + "own_tokenizer": false }, { "name": "Bagupi", "iso_1_code": null, "iso_3_code": "bpi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10042", + "scripts": [], + "own_tokenizer": false }, { "name": "Wagi", "iso_1_code": null, "iso_3_code": "fad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10043", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Gal", "iso_1_code": null, "iso_3_code": "gap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10044", + "scripts": [], + "own_tokenizer": false }, { "name": "Nobonob", "iso_1_code": null, "iso_3_code": "gaw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10045", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Garus", "iso_1_code": null, "iso_3_code": "gyb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10046", + "scripts": [], + "own_tokenizer": false }, { "name": "Mawan", "iso_1_code": null, "iso_3_code": "mcz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10047", + "scripts": [], + "own_tokenizer": false }, { "name": "Matepi", "iso_1_code": null, "iso_3_code": "mqe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10048", + "scripts": [], + "own_tokenizer": false }, { "name": "Nake", "iso_1_code": null, "iso_3_code": "nbk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10049", + "scripts": [], + "own_tokenizer": false }, { "name": "Rempi", "iso_1_code": null, "iso_3_code": "rmp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10050", + "scripts": [], + "own_tokenizer": false }, { "name": "Rapting", "iso_1_code": null, "iso_3_code": "rpt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10051", + "scripts": [], + "own_tokenizer": false }, { "name": "Saruga", "iso_1_code": null, "iso_3_code": "sra", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10052", + "scripts": [], + "own_tokenizer": false }, { "name": "Yoidik", "iso_1_code": null, "iso_3_code": "ydk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10053", + "scripts": [], + "own_tokenizer": false }, { "name": "Silopi-Utu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Utu", "iso_1_code": null, "iso_3_code": "utu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10055", + "scripts": [], + "own_tokenizer": false }, { "name": "Silopi", "iso_1_code": null, "iso_3_code": "xsp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10056", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10054", + "scripts": [], + "own_tokenizer": false }, { "name": "Wamas-Samosa-Murupi-Mosimo", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mosimo", "iso_1_code": null, "iso_3_code": "mqv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10058", + "scripts": [], + "own_tokenizer": false }, { "name": "Murupi", "iso_1_code": null, "iso_3_code": "mqw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10059", + "scripts": [], + "own_tokenizer": false }, { "name": "Samosa", "iso_1_code": null, "iso_3_code": "swm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10060", + "scripts": [], + "own_tokenizer": false }, { "name": "Wamas", "iso_1_code": null, "iso_3_code": "wmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10061", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10057", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10040", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10031", + "scripts": [], + "own_tokenizer": false }, { "name": "Mugil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bargam", "iso_1_code": null, "iso_3_code": "mlp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10063", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10062", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Adelbert", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gavak", "iso_1_code": null, "iso_3_code": "dmc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10065", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaukombar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mala", "iso_1_code": null, "iso_3_code": "ped", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10067", + "scripts": [], + "own_tokenizer": false }, { "name": "Miani", "iso_1_code": null, "iso_3_code": "pla", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10068", + "scripts": [], + "own_tokenizer": false }, { "name": "Maia", "iso_1_code": null, "iso_3_code": "sks", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10069", + "scripts": [], + "own_tokenizer": false }, { "name": "Maiani", "iso_1_code": null, "iso_3_code": "tnh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10070", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10066", + "scripts": [], + "own_tokenizer": false }, { "name": "Kumil-Tibor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kumil", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bepour", "iso_1_code": null, "iso_3_code": "bie", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10073", + "scripts": [], + "own_tokenizer": false }, { "name": "Mauwake", "iso_1_code": null, "iso_3_code": "mhl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10074", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Moere", "iso_1_code": null, "iso_3_code": "mvq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10075", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10072", + "scripts": [], + "own_tokenizer": false }, { "name": "Tibor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pamosu", "iso_1_code": null, "iso_3_code": "hih", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10077", + "scripts": [], + "own_tokenizer": false }, { "name": "Mawak", "iso_1_code": null, "iso_3_code": "mjj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10078", + "scripts": [], + "own_tokenizer": false }, { "name": "Hember Avu", "iso_1_code": null, "iso_3_code": "mmi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10079", + "scripts": [], + "own_tokenizer": false }, { "name": "Mokati", "iso_1_code": null, "iso_3_code": "wnb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10080", + "scripts": [], + "own_tokenizer": false }, { "name": "Kowaki", "iso_1_code": null, "iso_3_code": "xow", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10081", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10076", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10071", + "scripts": [], + "own_tokenizer": false }, { "name": "Manep-Barem", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Brem", "iso_1_code": null, "iso_3_code": "buq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10083", + "scripts": [], + "own_tokenizer": false }, { "name": "Manep", "iso_1_code": null, "iso_3_code": "mkr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10084", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10082", + "scripts": [], + "own_tokenizer": false }, { "name": "Numugen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karian-Usan-Yaban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karian", "iso_1_code": null, "iso_3_code": "bql", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10087", + "scripts": [], + "own_tokenizer": false }, { "name": "Usan", "iso_1_code": null, "iso_3_code": "wnu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10088", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yaben", "iso_1_code": null, "iso_3_code": "ybm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10089", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10086", + "scripts": [], + "own_tokenizer": false }, { "name": "Yarawata-Parawen-Ukuriguma", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Parawen", "iso_1_code": null, "iso_3_code": "prw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10091", + "scripts": [], + "own_tokenizer": false }, { "name": "Ukuriguma", "iso_1_code": null, "iso_3_code": "ukg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10092", + "scripts": [], + "own_tokenizer": false }, { "name": "Yarawata", "iso_1_code": null, "iso_3_code": "yrw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10093", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10090", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10085", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10064", + "scripts": [], + "own_tokenizer": false }, { "name": "Omosan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pal", "iso_1_code": null, "iso_3_code": "abw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10095", + "scripts": [], + "own_tokenizer": false }, { "name": "Kovol", "iso_1_code": null, "iso_3_code": "kgu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10096", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10094", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10020", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalam-Kobon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kalam", "iso_1_code": null, "iso_3_code": "kmh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10098", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kobon", "iso_1_code": null, "iso_3_code": "kpw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10099", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tay", "iso_1_code": null, "iso_3_code": "taw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10100", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10097", + "scripts": [], + "own_tokenizer": false }, { "name": "Rai Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wasembo", "iso_1_code": null, "iso_3_code": "gsp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10102", + "scripts": [], + "own_tokenizer": false }, { "name": "Biyom-Tauya", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Biyom", "iso_1_code": null, "iso_3_code": "bpm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10104", + "scripts": [], + "own_tokenizer": false }, { "name": "Tauya", "iso_1_code": null, "iso_3_code": "tya", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10105", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10103", + "scripts": [], + "own_tokenizer": false }, { "name": "Evapia", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kou", "iso_1_code": null, "iso_3_code": "snz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10107", + "scripts": [], + "own_tokenizer": false }, { "name": "Wia", "iso_1_code": null, "iso_3_code": "ssj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10108", + "scripts": [], + "own_tokenizer": false }, { "name": "Watiwa", "iso_1_code": null, "iso_3_code": "wtf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10109", + "scripts": [], + "own_tokenizer": false }, { "name": "Koromu", "iso_1_code": null, "iso_3_code": "xes", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10110", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10106", + "scripts": [], + "own_tokenizer": false }, { "name": "Kabenau", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Taupi", "iso_1_code": null, "iso_3_code": "awm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10112", + "scripts": [], + "own_tokenizer": false }, { "name": "Migum", "iso_1_code": null, "iso_3_code": "klm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10113", + "scripts": [], + "own_tokenizer": false }, { "name": "Lemio", "iso_1_code": null, "iso_3_code": "lei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10114", + "scripts": [], + "own_tokenizer": false }, { "name": "Pulabu", "iso_1_code": null, "iso_3_code": "pup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10115", + "scripts": [], + "own_tokenizer": false }, { "name": "Siroi", "iso_1_code": null, "iso_3_code": "ssd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10116", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10111", + "scripts": [], + "own_tokenizer": false }, { "name": "Mindjim", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anjam", "iso_1_code": null, "iso_3_code": "boj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10118", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bongu", "iso_1_code": null, "iso_3_code": "bpu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10119", + "scripts": [], + "own_tokenizer": false }, { "name": "Soq", "iso_1_code": null, "iso_3_code": "mdc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10120", + "scripts": [], + "own_tokenizer": false }, { "name": "Sam", "iso_1_code": null, "iso_3_code": "snx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10121", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10117", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Uyajitaya", "iso_1_code": null, "iso_3_code": "duk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10123", + "scripts": [], + "own_tokenizer": false }, { "name": "Ogea", "iso_1_code": null, "iso_3_code": "eri", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10124", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Jilim", "iso_1_code": null, "iso_3_code": "jil", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10125", + "scripts": [], + "own_tokenizer": false }, { "name": "Waube", "iso_1_code": null, "iso_3_code": "kop", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10126", + "scripts": [], + "own_tokenizer": false }, { "name": "Rerau", "iso_1_code": null, "iso_3_code": "rea", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10127", + "scripts": [], + "own_tokenizer": false }, { "name": "Uya", "iso_1_code": null, "iso_3_code": "usu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10128", + "scripts": [], + "own_tokenizer": false }, { "name": "Dubuporo", "iso_1_code": null, "iso_3_code": "ynl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10129", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10122", + "scripts": [], + "own_tokenizer": false }, { "name": "Peka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Danaru", "iso_1_code": null, "iso_3_code": "dnr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10131", + "scripts": [], + "own_tokenizer": false }, { "name": "Sumau", "iso_1_code": null, "iso_3_code": "six", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10132", + "scripts": [], + "own_tokenizer": false }, { "name": "Kobuka", "iso_1_code": null, "iso_3_code": "urg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10133", + "scripts": [], + "own_tokenizer": false }, { "name": "Sob", "iso_1_code": null, "iso_3_code": "urw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10134", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10130", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaganon", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dumun", "iso_1_code": null, "iso_3_code": "dui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10136", + "scripts": [], + "own_tokenizer": false }, { "name": "Ganglau", "iso_1_code": null, "iso_3_code": "ggl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10137", + "scripts": [], + "own_tokenizer": false }, { "name": "Saep", "iso_1_code": null, "iso_3_code": "spd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10138", + "scripts": [], + "own_tokenizer": false }, { "name": "Yabong", "iso_1_code": null, "iso_3_code": "ybo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10139", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10135", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10101", + "scripts": [], + "own_tokenizer": false }, { "name": "South Adelbert", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Josephstaal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Osum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Utarmbung", "iso_1_code": null, "iso_3_code": "omo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10143", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10142", + "scripts": [], + "own_tokenizer": false }, { "name": "Pomoikan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anamuxra", "iso_1_code": null, "iso_3_code": "imi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10145", + "scripts": [], + "own_tokenizer": false }, { "name": "Moresada", "iso_1_code": null, "iso_3_code": "msx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10146", + "scripts": [], + "own_tokenizer": false }, { "name": "Anam", "iso_1_code": null, "iso_3_code": "pda", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10147", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10144", + "scripts": [], + "own_tokenizer": false }, { "name": "Wadaginam", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wadaginam", "iso_1_code": null, "iso_3_code": "wdg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10149", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10148", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10141", + "scripts": [], + "own_tokenizer": false }, { "name": "Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apal", "iso_1_code": null, "iso_3_code": "ena", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10152", + "scripts": [], + "own_tokenizer": false }, { "name": "Magiyi", "iso_1_code": null, "iso_3_code": "gmg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10153", + "scripts": [], + "own_tokenizer": false }, { "name": "Manat", "iso_1_code": null, "iso_3_code": "pmr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10154", + "scripts": [], + "own_tokenizer": false }, { "name": "North Central Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mum", "iso_1_code": null, "iso_3_code": "kqa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10156", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Sirva", "iso_1_code": null, "iso_3_code": "sbq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10157", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10155", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10151", + "scripts": [], + "own_tokenizer": false }, { "name": "East Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kursav", "iso_1_code": null, "iso_3_code": "faj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10159", + "scripts": [], + "own_tokenizer": false }, { "name": "Gants", "iso_1_code": null, "iso_3_code": "gao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10160", + "scripts": [], + "own_tokenizer": false }, { "name": "Mag\u0268", "iso_1_code": null, "iso_3_code": "gkd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10161", + "scripts": [], + "own_tokenizer": false }, { "name": "Aisi", "iso_1_code": null, "iso_3_code": "mmq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10162", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10158", + "scripts": [], + "own_tokenizer": false }, { "name": "West Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nend", "iso_1_code": null, "iso_3_code": "anh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10164", + "scripts": [], + "own_tokenizer": false }, { "name": "Mand", "iso_1_code": null, "iso_3_code": "ate", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10165", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10163", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10150", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10140", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10019", + "scripts": [], + "own_tokenizer": false }, { "name": "Marind", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Boazi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kuni-Boazi", "iso_1_code": null, "iso_3_code": "kvg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10168", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zimakani", "iso_1_code": null, "iso_3_code": "zik", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10169", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10167", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuclear Marind", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Marind, Bian", "iso_1_code": null, "iso_3_code": "bpv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10171", + "scripts": [], + "own_tokenizer": false }, { "name": "Marind", "iso_1_code": null, "iso_3_code": "mrz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10172", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10170", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaqay", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Warkay-Bipim", "iso_1_code": null, "iso_3_code": "bgv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10174", + "scripts": [], + "own_tokenizer": false }, { "name": "Yaqay", "iso_1_code": null, "iso_3_code": "jaq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10175", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10173", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10166", + "scripts": [], + "own_tokenizer": false }, { "name": "Mek", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lik", "iso_1_code": null, "iso_3_code": "eip", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10178", + "scripts": [], + "own_tokenizer": false }, { "name": "Yale, Kosarek", "iso_1_code": null, "iso_3_code": "kkl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10179", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Korupun-Sela", "iso_1_code": null, "iso_3_code": "kpq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10180", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Una", "iso_1_code": null, "iso_3_code": "mtg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10181", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nalca", "iso_1_code": null, "iso_3_code": "nlc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10182", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nipsan", "iso_1_code": null, "iso_3_code": "nps", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10183", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10177", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ketengban", "iso_1_code": null, "iso_3_code": "xte", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10185", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10184", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10176", + "scripts": [], + "own_tokenizer": false }, { "name": "Mombum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Koneraw", "iso_1_code": null, "iso_3_code": "kdw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10187", + "scripts": [], + "own_tokenizer": false }, { "name": "Mombum", "iso_1_code": null, "iso_3_code": "mso", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10188", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10186", + "scripts": [], + "own_tokenizer": false }, { "name": "Mor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mor", "iso_1_code": null, "iso_3_code": "moq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10190", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10189", + "scripts": [], + "own_tokenizer": false }, { "name": "Moraori", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Morori", "iso_1_code": null, "iso_3_code": "mok", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10192", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10191", + "scripts": [], + "own_tokenizer": false }, { "name": "Ok-Awyu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awyu-Dumut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awyu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aghu", "iso_1_code": null, "iso_3_code": "ahh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10196", + "scripts": [], + "own_tokenizer": false }, { "name": "Awyu, South", "iso_1_code": null, "iso_3_code": "aws", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10197", + "scripts": [], + "own_tokenizer": false }, { "name": "Awyu, Central", "iso_1_code": null, "iso_3_code": "awu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10198", + "scripts": [], + "own_tokenizer": false }, { "name": "Awyu, Jair", "iso_1_code": null, "iso_3_code": "awv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10199", + "scripts": [], + "own_tokenizer": false }, { "name": "Awyu, Edera", "iso_1_code": null, "iso_3_code": "awy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10200", + "scripts": [], + "own_tokenizer": false }, { "name": "Awyu, Asue", "iso_1_code": null, "iso_3_code": "psa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10201", + "scripts": [], + "own_tokenizer": false }, { "name": "Awyu, North", "iso_1_code": null, "iso_3_code": "yir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10202", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10195", + "scripts": [], + "own_tokenizer": false }, { "name": "Dumut", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mandobo Atas", "iso_1_code": null, "iso_3_code": "aax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10204", + "scripts": [], + "own_tokenizer": false }, { "name": "Mandobo Bawah", "iso_1_code": null, "iso_3_code": "bwp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10205", + "scripts": [], + "own_tokenizer": false }, { "name": "Ketum", "iso_1_code": null, "iso_3_code": "ktt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10206", + "scripts": [], + "own_tokenizer": false }, { "name": "Kombai", "iso_1_code": null, "iso_3_code": "tyn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10207", + "scripts": [], + "own_tokenizer": false }, { "name": "Wambon", "iso_1_code": null, "iso_3_code": "wms", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10208", + "scripts": [], + "own_tokenizer": false }, { "name": "Wanggom", "iso_1_code": null, "iso_3_code": "wng", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10209", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10203", + "scripts": [], + "own_tokenizer": false }, { "name": "Korowai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Korowai", "iso_1_code": null, "iso_3_code": "khe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10211", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10210", + "scripts": [], + "own_tokenizer": false }, { "name": "Sawi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sawi", "iso_1_code": null, "iso_3_code": "saw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10213", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10212", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10194", + "scripts": [], + "own_tokenizer": false }, { "name": "Ok", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Lowland", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Morop", "iso_1_code": null, "iso_3_code": "iwo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10216", + "scripts": [], + "own_tokenizer": false }, { "name": "Muyu, North", "iso_1_code": null, "iso_3_code": "kti", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10217", + "scripts": [], + "own_tokenizer": false }, { "name": "Muyu, South", "iso_1_code": null, "iso_3_code": "kts", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10218", + "scripts": [], + "own_tokenizer": false }, { "name": "Ninggerum", "iso_1_code": null, "iso_3_code": "nxr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10219", + "scripts": [], + "own_tokenizer": false }, { "name": "Yongkom", "iso_1_code": null, "iso_3_code": "yon", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10220", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10215", + "scripts": [], + "own_tokenizer": false }, { "name": "Mountain", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bimin", "iso_1_code": null, "iso_3_code": "bhl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10222", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Faiwol", "iso_1_code": null, "iso_3_code": "fai", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10223", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mian", "iso_1_code": null, "iso_3_code": "mpt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10224", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nakai", "iso_1_code": null, "iso_3_code": "nkj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10225", + "scripts": [], + "own_tokenizer": false }, { "name": "Setaman", "iso_1_code": null, "iso_3_code": "stm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10226", + "scripts": [], + "own_tokenizer": false }, { "name": "Suganga", "iso_1_code": null, "iso_3_code": "sug", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10227", + "scripts": [], + "own_tokenizer": false }, { "name": "Tifal", "iso_1_code": null, "iso_3_code": "tif", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10228", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Telefol", "iso_1_code": null, "iso_3_code": "tlf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10229", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Urapmin", "iso_1_code": null, "iso_3_code": "urm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10230", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10221", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngalum", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tsaukambo", "iso_1_code": null, "iso_3_code": "kvz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10232", + "scripts": [], + "own_tokenizer": false }, { "name": "Komyandaret", "iso_1_code": null, "iso_3_code": "kzv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10233", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngalum", "iso_1_code": null, "iso_3_code": "szb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10234", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10231", + "scripts": [], + "own_tokenizer": false }, { "name": "Tangko", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tangko", "iso_1_code": null, "iso_3_code": "tkx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10236", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10235", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Burumakok", "iso_1_code": null, "iso_3_code": "aip", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10238", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwer", "iso_1_code": null, "iso_3_code": "kwr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10239", + "scripts": [], + "own_tokenizer": false }, { "name": "Kopkaka", "iso_1_code": null, "iso_3_code": "opk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10240", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10237", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10214", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10193", + "scripts": [], + "own_tokenizer": false }, { "name": "Oksapmin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Oksapmin", "iso_1_code": null, "iso_3_code": "opm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10242", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10241", + "scripts": [], + "own_tokenizer": false }, { "name": "Pawaian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pawaia", "iso_1_code": null, "iso_3_code": "pwa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10244", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10243", + "scripts": [], + "own_tokenizer": false }, { "name": "South Bird\u2019s Head", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Inanwatan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Duriankere", "iso_1_code": null, "iso_3_code": "dbn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10247", + "scripts": [], + "own_tokenizer": false }, { "name": "Suabo", "iso_1_code": null, "iso_3_code": "szp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10248", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10246", + "scripts": [], + "own_tokenizer": false }, { "name": "Konda-Yahadian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Konda", "iso_1_code": null, "iso_3_code": "knd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10250", + "scripts": [], + "own_tokenizer": false }, { "name": "Yahadian", "iso_1_code": null, "iso_3_code": "ner", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10251", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10249", + "scripts": [], + "own_tokenizer": false }, { "name": "South Bird\u2019s Head Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kokoda", "iso_1_code": null, "iso_3_code": "xod", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10254", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10253", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kemberano", "iso_1_code": null, "iso_3_code": "bzp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10256", + "scripts": [], + "own_tokenizer": false }, { "name": "Arandai", "iso_1_code": null, "iso_3_code": "jbj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10257", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10255", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kais", "iso_1_code": null, "iso_3_code": "kzm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10259", + "scripts": [], + "own_tokenizer": false }, { "name": "Puragi", "iso_1_code": null, "iso_3_code": "pru", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10260", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaburi", "iso_1_code": null, "iso_3_code": "uka", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10261", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10258", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10252", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10245", + "scripts": [], + "own_tokenizer": false }, { "name": "Southeast Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dagan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Daga", "iso_1_code": null, "iso_3_code": "dgz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10264", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Umanakaina", "iso_1_code": null, "iso_3_code": "gdn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10265", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ginuman", "iso_1_code": null, "iso_3_code": "gnm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10266", + "scripts": [], + "own_tokenizer": false }, { "name": "Dima", "iso_1_code": null, "iso_3_code": "jma", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10267", + "scripts": [], + "own_tokenizer": false }, { "name": "Mapena", "iso_1_code": null, "iso_3_code": "mnm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10268", + "scripts": [], + "own_tokenizer": false }, { "name": "Maiwa", "iso_1_code": null, "iso_3_code": "mti", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10269", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Onjob", "iso_1_code": null, "iso_3_code": "onj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10270", + "scripts": [], + "own_tokenizer": false }, { "name": "Kanasi", "iso_1_code": null, "iso_3_code": "soq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10271", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Turaka", "iso_1_code": null, "iso_3_code": "trh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10272", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10263", + "scripts": [], + "own_tokenizer": false }, { "name": "Goilalan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fuyug", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fuyug", "iso_1_code": null, "iso_3_code": "fuy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10275", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10274", + "scripts": [], + "own_tokenizer": false }, { "name": "Kunimaipa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Biangai", "iso_1_code": null, "iso_3_code": "big", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10277", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kunimaipa", "iso_1_code": null, "iso_3_code": "kup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10278", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tauade", "iso_1_code": null, "iso_3_code": "ttd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10279", + "scripts": [], + "own_tokenizer": false }, { "name": "Amam", "iso_1_code": null, "iso_3_code": "wer", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10280", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10276", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10273", + "scripts": [], + "own_tokenizer": false }, { "name": "Koiarian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baraic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "\u00d6mie", "iso_1_code": null, "iso_3_code": "aom", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10283", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Barai", "iso_1_code": null, "iso_3_code": "bbb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10284", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ese", "iso_1_code": null, "iso_3_code": "mcq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10285", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Namiae", "iso_1_code": null, "iso_3_code": "nvm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10286", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10282", + "scripts": [], + "own_tokenizer": false }, { "name": "Koiaric", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Biage", "iso_1_code": null, "iso_3_code": "bdf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10288", + "scripts": [], + "own_tokenizer": false }, { "name": "Koiari", "iso_1_code": null, "iso_3_code": "kbk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10289", + "scripts": [], + "own_tokenizer": false }, { "name": "Koiali, Mountain", "iso_1_code": null, "iso_3_code": "kpx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10290", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Koita", "iso_1_code": null, "iso_3_code": "kqi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10291", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10287", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10281", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwalean", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Humene", "iso_1_code": null, "iso_3_code": "huf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10293", + "scripts": [], + "own_tokenizer": false }, { "name": "Uare", "iso_1_code": null, "iso_3_code": "ksj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10294", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mulaha", "iso_1_code": null, "iso_3_code": "mfw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10295", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10292", + "scripts": [], + "own_tokenizer": false }, { "name": "Mailuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bauwaki", "iso_1_code": null, "iso_3_code": "bwk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10297", + "scripts": [], + "own_tokenizer": false }, { "name": "Binahari", "iso_1_code": null, "iso_3_code": "bxz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10298", + "scripts": [], + "own_tokenizer": false }, { "name": "Domu", "iso_1_code": null, "iso_3_code": "dof", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10299", + "scripts": [], + "own_tokenizer": false }, { "name": "Laua", "iso_1_code": null, "iso_3_code": "luf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10300", + "scripts": [], + "own_tokenizer": false }, { "name": "Magi", "iso_1_code": null, "iso_3_code": "mgu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10301", + "scripts": [], + "own_tokenizer": false }, { "name": "Morawa", "iso_1_code": null, "iso_3_code": "mze", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10302", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10296", + "scripts": [], + "own_tokenizer": false }, { "name": "Manubaran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Doromu-Koki", "iso_1_code": null, "iso_3_code": "kqc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10304", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maria", "iso_1_code": null, "iso_3_code": "mds", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10305", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10303", + "scripts": [], + "own_tokenizer": false }, { "name": "Yareban", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aneme Wake", "iso_1_code": null, "iso_3_code": "aby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10307", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bariji", "iso_1_code": null, "iso_3_code": "bjc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10308", + "scripts": [], + "own_tokenizer": false }, { "name": "Moikodi", "iso_1_code": null, "iso_3_code": "mkp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10309", + "scripts": [], + "own_tokenizer": false }, { "name": "Nawaru", "iso_1_code": null, "iso_3_code": "nwr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10310", + "scripts": [], + "own_tokenizer": false }, { "name": "Yareba", "iso_1_code": null, "iso_3_code": "yrb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10311", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10306", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10262", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanah Merah", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tanahmerah", "iso_1_code": null, "iso_3_code": "tcm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10313", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10312", + "scripts": [], + "own_tokenizer": false }, { "name": "Teberan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dadibi", "iso_1_code": null, "iso_3_code": "mps", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10315", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Folopa", "iso_1_code": null, "iso_3_code": "ppo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10316", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10314", + "scripts": [], + "own_tokenizer": false }, { "name": "Tirio", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abom", "iso_1_code": null, "iso_3_code": "aob", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10318", + "scripts": [], + "own_tokenizer": false }, { "name": "Makayam", "iso_1_code": null, "iso_3_code": "aup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10319", + "scripts": [], + "own_tokenizer": false }, { "name": "Baramu", "iso_1_code": null, "iso_3_code": "bmz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10320", + "scripts": [], + "own_tokenizer": false }, { "name": "Bitur", "iso_1_code": null, "iso_3_code": "mcc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10321", + "scripts": [], + "own_tokenizer": false }, { "name": "Kiunum", "iso_1_code": null, "iso_3_code": "wei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10322", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10317", + "scripts": [], + "own_tokenizer": false }, { "name": "Turama-Kikorian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kairi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Rumu", "iso_1_code": null, "iso_3_code": "klq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10325", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10324", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaser", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Barikewa", "iso_1_code": null, "iso_3_code": "jbk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10327", + "scripts": [], + "own_tokenizer": false }, { "name": "Mouwase", "iso_1_code": null, "iso_3_code": "jmw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10328", + "scripts": [], + "own_tokenizer": false }, { "name": "Ikobi", "iso_1_code": null, "iso_3_code": "meb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10329", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10326", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10323", + "scripts": [], + "own_tokenizer": false }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dani Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Dani, Upper Grand Valley", "iso_1_code": null, "iso_3_code": "dna", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10333", + "scripts": [], + "own_tokenizer": false }, { "name": "Dani, Lower Grand Valley", "iso_1_code": null, "iso_3_code": "dni", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10334", + "scripts": [], + "own_tokenizer": false }, { "name": "Dani, Mid Grand Valley", "iso_1_code": null, "iso_3_code": "dnt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10335", + "scripts": [], + "own_tokenizer": false }, { "name": "Dani, Western", "iso_1_code": null, "iso_3_code": "dnw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10336", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Hupla", "iso_1_code": null, "iso_3_code": "hap", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10337", + "scripts": [], + "own_tokenizer": false }, { "name": "Nggem", "iso_1_code": null, "iso_3_code": "nbq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10338", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Walak", "iso_1_code": null, "iso_3_code": "wlw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10339", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10332", + "scripts": [], + "own_tokenizer": false }, { "name": "Ngalik", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nduga", "iso_1_code": null, "iso_3_code": "ndx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10341", + "scripts": [], + "own_tokenizer": false }, { "name": "Yali, Ninia", "iso_1_code": null, "iso_3_code": "nlk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10342", + "scripts": [], + "own_tokenizer": false }, { "name": "Silimo", "iso_1_code": null, "iso_3_code": "wul", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10343", + "scripts": [], + "own_tokenizer": false }, { "name": "Yali, Pass Valley", "iso_1_code": null, "iso_3_code": "yac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10344", + "scripts": [], + "own_tokenizer": false }, { "name": "Yali, Angguruk", "iso_1_code": null, "iso_3_code": "yli", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10345", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10340", + "scripts": [], + "own_tokenizer": false }, { "name": "Wano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wano", "iso_1_code": null, "iso_3_code": "wno", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10347", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10346", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10331", + "scripts": [], + "own_tokenizer": false }, { "name": "Timor-Alor-Pantar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wersing", "iso_1_code": null, "iso_3_code": "kvw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10349", + "scripts": [], + "own_tokenizer": false }, { "name": "Oirata", "iso_1_code": null, "iso_3_code": "oia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10350", + "scripts": [], + "own_tokenizer": false }, { "name": "Alor-Pantar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Abui", "iso_1_code": null, "iso_3_code": "abz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10353", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Adang", "iso_1_code": null, "iso_3_code": "adn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10354", + "scripts": [], + "own_tokenizer": false }, { "name": "Hamap", "iso_1_code": null, "iso_3_code": "hmu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10355", + "scripts": [], + "own_tokenizer": false }, { "name": "Kabola", "iso_1_code": null, "iso_3_code": "klz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10356", + "scripts": [], + "own_tokenizer": false }, { "name": "Kafoa", "iso_1_code": null, "iso_3_code": "kpu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10357", + "scripts": [], + "own_tokenizer": false }, { "name": "Kui", "iso_1_code": null, "iso_3_code": "kvd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10358", + "scripts": [], + "own_tokenizer": false }, { "name": "Klon", "iso_1_code": null, "iso_3_code": "kyo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10359", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamang", "iso_1_code": null, "iso_3_code": "woi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10360", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10352", + "scripts": [], + "own_tokenizer": false }, { "name": "Pantar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Blagar", "iso_1_code": null, "iso_3_code": "beu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10362", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaera", "iso_1_code": null, "iso_3_code": "jka", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10363", + "scripts": [], + "own_tokenizer": false }, { "name": "Pantar, Western", "iso_1_code": null, "iso_3_code": "lev", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10364", + "scripts": [], + "own_tokenizer": false }, { "name": "Nedebang", "iso_1_code": null, "iso_3_code": "nec", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10365", + "scripts": [], + "own_tokenizer": false }, { "name": "Reta", "iso_1_code": null, "iso_3_code": "ret", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10366", + "scripts": [], + "own_tokenizer": false }, { "name": "Teiwa", "iso_1_code": null, "iso_3_code": "twe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10367", + "scripts": [], + "own_tokenizer": false }, { "name": "Tereweng", "iso_1_code": null, "iso_3_code": "twg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10368", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10361", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10351", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanglapui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Sawila", "iso_1_code": null, "iso_3_code": "swt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10370", + "scripts": [], + "own_tokenizer": false }, { "name": "Kula", "iso_1_code": null, "iso_3_code": "tpg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10371", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10369", + "scripts": [], + "own_tokenizer": false }, { "name": "Timor", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bunak", "iso_1_code": null, "iso_3_code": "bfn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10373", + "scripts": [], + "own_tokenizer": false }, { "name": "Fataluku", "iso_1_code": null, "iso_3_code": "ddg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10374", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Makalero", "iso_1_code": null, "iso_3_code": "mjb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10375", + "scripts": [], + "own_tokenizer": false }, { "name": "Makasae", "iso_1_code": null, "iso_3_code": "mkz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10376", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10372", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10348", + "scripts": [], + "own_tokenizer": false }, { "name": "West Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karas", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karas", "iso_1_code": null, "iso_3_code": "kgv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10379", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10378", + "scripts": [], + "own_tokenizer": false }, { "name": "West Bomberai Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Baham", "iso_1_code": null, "iso_3_code": "bdw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10381", + "scripts": [], + "own_tokenizer": false }, { "name": "Iha", "iso_1_code": null, "iso_3_code": "ihp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10382", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10380", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10377", + "scripts": [], + "own_tokenizer": false }, { "name": "Wissel Lakes", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Auye", "iso_1_code": null, "iso_3_code": "auu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10384", + "scripts": [], + "own_tokenizer": false }, { "name": "Moi", "iso_1_code": null, "iso_3_code": "daz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10385", + "scripts": [], + "own_tokenizer": false }, { "name": "Ekari", "iso_1_code": null, "iso_3_code": "ekg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10386", + "scripts": [], + "own_tokenizer": false }, { "name": "Moni", "iso_1_code": null, "iso_3_code": "mnz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10387", + "scripts": [], + "own_tokenizer": false }, { "name": "Wolani", "iso_1_code": null, "iso_3_code": "wod", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10388", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10383", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10330", + "scripts": [], + "own_tokenizer": false }, { "name": "West Kutubu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Fasu", "iso_1_code": null, "iso_3_code": "faa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10390", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10389", + "scripts": [], + "own_tokenizer": false }, { "name": "Wiru", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Witu", "iso_1_code": null, "iso_3_code": "wiu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10392", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10391", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "9723", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tsimshian.json b/data/Tsimshian.json index ce997646cb901e9b84ddd3b9e03a79aa938a5051..3cbd01a3be4bacb64b2158cf175bcfb72278eb77 100644 --- a/data/Tsimshian.json +++ b/data/Tsimshian.json @@ -2,41 +2,51 @@ "name": "Tsimshian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tsimshian", "iso_1_code": null, "iso_3_code": "tsi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10394", + "scripts": [], + "own_tokenizer": false }, { "name": "Nass-Gitksan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gitxsan", "iso_1_code": null, "iso_3_code": "git", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10396", + "scripts": [], + "own_tokenizer": false }, { "name": "Nisga\u2019a", "iso_1_code": null, "iso_3_code": "ncg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10397", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10395", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10393", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tucanoan.json b/data/Tucanoan.json index 97085c5717261ff029df12cf8afcae56471808c0..b29666e50fb0787377b4dc3a8f0f94b61a1b3462 100644 --- a/data/Tucanoan.json +++ b/data/Tucanoan.json @@ -2,280 +2,380 @@ "name": "Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Miriti", "iso_1_code": null, "iso_3_code": "mmv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10399", + "scripts": [], + "own_tokenizer": false }, { "name": "Central Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cubeo", "iso_1_code": null, "iso_3_code": "cub", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10401", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10400", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arapaso", "iso_1_code": null, "iso_3_code": "arj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10403", + "scripts": [], + "own_tokenizer": false }, { "name": "Wanano", "iso_1_code": null, "iso_3_code": "gvc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10404", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Macuna", "iso_1_code": null, "iso_3_code": "myy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10405", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Piratapuyo", "iso_1_code": null, "iso_3_code": "pir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10406", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bar\u00e1-Tuyuka", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Waimaha", "iso_1_code": null, "iso_3_code": "bao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10408", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Barasana-Eduria", "iso_1_code": null, "iso_3_code": "bsn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10409", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pokang\u00e1", "iso_1_code": null, "iso_3_code": "pok", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10410", + "scripts": [], + "own_tokenizer": false }, { "name": "Tuyuca", "iso_1_code": null, "iso_3_code": "tue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10411", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10407", + "scripts": [], + "own_tokenizer": false }, { "name": "Carapano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Carapana", "iso_1_code": null, "iso_3_code": "cbc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10413", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tatuyo", "iso_1_code": null, "iso_3_code": "tav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10414", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10412", + "scripts": [], + "own_tokenizer": false }, { "name": "Desano-Siriano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Desano", "iso_1_code": null, "iso_3_code": "des", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10416", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Siriano", "iso_1_code": null, "iso_3_code": "sri", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10417", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10415", + "scripts": [], + "own_tokenizer": false }, { "name": "Tucano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tucano", "iso_1_code": null, "iso_3_code": "tuo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10419", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wajiara", "iso_1_code": null, "iso_3_code": "yui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10420", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10418", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10402", + "scripts": [], + "own_tokenizer": false }, { "name": "Western Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Koreguaje", "iso_1_code": null, "iso_3_code": "coe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10422", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Maijuna", "iso_1_code": null, "iso_3_code": "ore", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10423", + "scripts": [], + "own_tokenizer": false }, { "name": "Tetete", "iso_1_code": null, "iso_3_code": "teb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10424", + "scripts": [], + "own_tokenizer": false }, { "name": "Tama", "iso_1_code": null, "iso_3_code": "ten", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10425", + "scripts": [], + "own_tokenizer": false }, { "name": "Tanimuca-Letuama", "iso_1_code": null, "iso_3_code": "tnc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10426", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yahuna", "iso_1_code": null, "iso_3_code": "ynu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10427", + "scripts": [], + "own_tokenizer": false }, { "name": "Macaguaje", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Macaguaje", "iso_1_code": null, "iso_3_code": "mcl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10429", + "scripts": [], + "own_tokenizer": false }, { "name": "Paicoca", "iso_1_code": null, "iso_3_code": "sey", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10430", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Siona", "iso_1_code": null, "iso_3_code": "snn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10431", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10428", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10421", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10398", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tungusic.json b/data/Tungusic.json index 3fe15806611abc5ee6b540eecdce3816d629b415..e3d1ba5f0d05583bf8cc22728be1ef1ea1539e55 100644 --- a/data/Tungusic.json +++ b/data/Tungusic.json @@ -2,185 +2,233 @@ "name": "Tungusic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Even", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Even", "iso_1_code": null, "iso_3_code": "eve", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10435", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10434", + "scripts": [], + "own_tokenizer": false }, { "name": "Evenki", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Evenki", "iso_1_code": null, "iso_3_code": "evn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10437", + "scripts": [], + "own_tokenizer": false }, { "name": "Oroqen", "iso_1_code": null, "iso_3_code": "orh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10438", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10436", + "scripts": [], + "own_tokenizer": false }, { "name": "Negidal", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Negidal", "iso_1_code": null, "iso_3_code": "neg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10440", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10439", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10433", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nanaj", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nanai", "iso_1_code": null, "iso_3_code": "gld", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10444", + "scripts": [], + "own_tokenizer": false }, { "name": "Orok", "iso_1_code": null, "iso_3_code": "oaa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10445", + "scripts": [], + "own_tokenizer": false }, { "name": "Ulch", "iso_1_code": null, "iso_3_code": "ulc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10446", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10443", + "scripts": [], + "own_tokenizer": false }, { "name": "Udihe", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Oroch", "iso_1_code": null, "iso_3_code": "oac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10448", + "scripts": [], + "own_tokenizer": false }, { "name": "Udihe", "iso_1_code": null, "iso_3_code": "ude", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10449", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10447", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10442", + "scripts": [], + "own_tokenizer": false }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jurchen", "iso_1_code": null, "iso_3_code": "juc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10451", + "scripts": [], + "own_tokenizer": false }, { "name": "Manchu", "iso_1_code": null, "iso_3_code": "mnc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10452", + "scripts": [], + "own_tokenizer": false }, { "name": "Xibe", "iso_1_code": null, "iso_3_code": "sjo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10453", + "scripts": [ + "Mong" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10450", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10441", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10432", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tupian.json b/data/Tupian.json index 44fc37d2ffa6edbd8d94563a8da1d31956b91413..92955d5c7d64a8095fc31c7ad46fff67143a0644 100644 --- a/data/Tupian.json +++ b/data/Tupian.json @@ -2,841 +2,1087 @@ "name": "Tupian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Awet\u00ed", "iso_1_code": null, "iso_3_code": "awe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10455", + "scripts": [], + "own_tokenizer": false }, { "name": "Kamayur\u00e1", "iso_1_code": null, "iso_3_code": "kay", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10456", + "scripts": [], + "own_tokenizer": false }, { "name": "Sater\u00e9-Maw\u00e9", "iso_1_code": null, "iso_3_code": "mav", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10457", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Zo\u2019\u00e9", "iso_1_code": null, "iso_3_code": "pto", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10458", + "scripts": [], + "own_tokenizer": false }, { "name": "Purubor\u00e1", "iso_1_code": null, "iso_3_code": "pur", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10459", + "scripts": [], + "own_tokenizer": false }, { "name": "Arikem", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arikem", "iso_1_code": null, "iso_3_code": "ait", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10461", + "scripts": [], + "own_tokenizer": false }, { "name": "Kariti\u00e2na", "iso_1_code": null, "iso_3_code": "ktn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10462", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10460", + "scripts": [], + "own_tokenizer": false }, { "name": "Juruna", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jur\u00fana", "iso_1_code": null, "iso_3_code": "jur", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10464", + "scripts": [], + "own_tokenizer": false }, { "name": "Maritsau\u00e1", "iso_1_code": null, "iso_3_code": "msp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10465", + "scripts": [], + "own_tokenizer": false }, { "name": "Xipaya", "iso_1_code": null, "iso_3_code": "xiy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10466", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10463", + "scripts": [], + "own_tokenizer": false }, { "name": "Mond\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mond\u00e9", "iso_1_code": null, "iso_3_code": "mnd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10468", + "scripts": [], + "own_tokenizer": false }, { "name": "Suru\u00ed", "iso_1_code": null, "iso_3_code": "sru", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10469", + "scripts": [], + "own_tokenizer": false }, { "name": "Aru\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aru\u00e1", "iso_1_code": null, "iso_3_code": "arx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10471", + "scripts": [], + "own_tokenizer": false }, { "name": "Cinta Larga", "iso_1_code": null, "iso_3_code": "cin", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10472", + "scripts": [], + "own_tokenizer": false }, { "name": "Gavi\u00e3o do Jiparan\u00e1", "iso_1_code": null, "iso_3_code": "gvo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10473", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10470", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10467", + "scripts": [], + "own_tokenizer": false }, { "name": "Munduruk\u00fa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kuru\u00e1ya", "iso_1_code": null, "iso_3_code": "kyr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10475", + "scripts": [], + "own_tokenizer": false }, { "name": "Munduruk\u00fa", "iso_1_code": null, "iso_3_code": "myu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10476", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10474", + "scripts": [], + "own_tokenizer": false }, { "name": "Ramarama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Karo", "iso_1_code": null, "iso_3_code": "arr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10478", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Urumi", "iso_1_code": null, "iso_3_code": "uru", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10479", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10477", + "scripts": [], + "own_tokenizer": false }, { "name": "Tupar\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Akuntsu", "iso_1_code": null, "iso_3_code": "aqz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10481", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kepkiriw\u00e1t", "iso_1_code": null, "iso_3_code": "kpn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10482", + "scripts": [], + "own_tokenizer": false }, { "name": "Makur\u00e1p", "iso_1_code": null, "iso_3_code": "mpu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10483", + "scripts": [], + "own_tokenizer": false }, { "name": "Sakirabi\u00e1", "iso_1_code": null, "iso_3_code": "skf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10484", + "scripts": [], + "own_tokenizer": false }, { "name": "Tupar\u00ed", "iso_1_code": null, "iso_3_code": "tpr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10485", + "scripts": [], + "own_tokenizer": false }, { "name": "Wayor\u00f3", "iso_1_code": null, "iso_3_code": "wyr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10486", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10480", + "scripts": [], + "own_tokenizer": false }, { "name": "Tup\u00ed-Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Arawet\u00e9", "iso_1_code": null, "iso_3_code": "awt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10488", + "scripts": [], + "own_tokenizer": false }, { "name": "Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ach\u00e9", "iso_1_code": null, "iso_3_code": "guq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10490", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Xet\u00e1", "iso_1_code": null, "iso_3_code": "xet", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10491", + "scripts": [], + "own_tokenizer": false }, { "name": "Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guaran\u00ed, Paraguayan", "iso_1_code": "gn", "iso_3_code": "gug", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10493", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guaran\u00ed, Mby\u00e1", "iso_1_code": "gn", "iso_3_code": "gun", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10494", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Kaiw\u00e1", "iso_1_code": null, "iso_3_code": "kgk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10495", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guaran\u00ed, Ava", "iso_1_code": "gn", "iso_3_code": "nhd", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10496", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pai Tavytera", "iso_1_code": null, "iso_3_code": "pta", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10497", + "scripts": [], + "own_tokenizer": false }, { "name": "\u00d1andeva", "iso_1_code": null, "iso_3_code": "tpj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10498", + "scripts": [], + "own_tokenizer": false }, { "name": "Bolivian Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guaran\u00ed, Western Bolivian", "iso_1_code": "gn", "iso_3_code": "gnw", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10500", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Guaran\u00ed, Eastern Bolivian", "iso_1_code": "gn", "iso_3_code": "gui", - "tokenizer": null, - "source": "macrolanguage", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10501", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10499", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10492", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10489", + "scripts": [], + "own_tokenizer": false }, { "name": "Guaray\u00fa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guarayu", "iso_1_code": null, "iso_3_code": "gyr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10503", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pauserna", "iso_1_code": null, "iso_3_code": "psm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10504", + "scripts": [], + "own_tokenizer": false }, { "name": "Sirion\u00f3", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Jor\u00e1", "iso_1_code": null, "iso_3_code": "jor", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10506", + "scripts": [], + "own_tokenizer": false }, { "name": "Sirion\u00f3", "iso_1_code": null, "iso_3_code": "srq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10507", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yuqui", "iso_1_code": null, "iso_3_code": "yuq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10508", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10505", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10502", + "scripts": [], + "own_tokenizer": false }, { "name": "Kawahib", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Apiak\u00e1", "iso_1_code": null, "iso_3_code": "api", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10510", + "scripts": [], + "own_tokenizer": false }, { "name": "Uru-Pa-In", "iso_1_code": null, "iso_3_code": "urp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10511", + "scripts": [], + "own_tokenizer": false }, { "name": "Uru-Eu-Wau-Wau", "iso_1_code": null, "iso_3_code": "urz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10512", + "scripts": [], + "own_tokenizer": false }, { "name": "Parintintin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Amundava", "iso_1_code": null, "iso_3_code": "adw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10514", + "scripts": [], + "own_tokenizer": false }, { "name": "J\u00fama", "iso_1_code": null, "iso_3_code": "jua", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10515", + "scripts": [], + "own_tokenizer": false }, { "name": "Karipuna", "iso_1_code": null, "iso_3_code": "kuq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10516", + "scripts": [], + "own_tokenizer": false }, { "name": "Paranaw\u00e1t", "iso_1_code": null, "iso_3_code": "paf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10517", + "scripts": [], + "own_tokenizer": false }, { "name": "Tenharim", "iso_1_code": null, "iso_3_code": "pah", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10518", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tukumanf\u00e9d", "iso_1_code": null, "iso_3_code": "tkf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10519", + "scripts": [], + "own_tokenizer": false }, { "name": "Wiraf\u00e9d", "iso_1_code": null, "iso_3_code": "wir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10520", + "scripts": [], + "own_tokenizer": false }, { "name": "Morerebi", "iso_1_code": null, "iso_3_code": "xmo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10521", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10513", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10509", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayab\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Asurini of Xing\u00fa", "iso_1_code": null, "iso_3_code": "asn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10523", + "scripts": [], + "own_tokenizer": false }, { "name": "Kayab\u00ed", "iso_1_code": null, "iso_3_code": "kyz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10524", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10522", + "scripts": [], + "own_tokenizer": false }, { "name": "Tenetehara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Av\u00e1-Canoeiro", "iso_1_code": null, "iso_3_code": "avv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10526", + "scripts": [], + "own_tokenizer": false }, { "name": "Tapirap\u00e9", "iso_1_code": null, "iso_3_code": "taf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10527", + "scripts": [], + "own_tokenizer": false }, { "name": "Akwawa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Asurini, Tocantins", "iso_1_code": null, "iso_3_code": "asu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10529", + "scripts": [], + "own_tokenizer": false }, { "name": "Suru\u00ed do Par\u00e1", "iso_1_code": null, "iso_3_code": "mdz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10530", + "scripts": [], + "own_tokenizer": false }, { "name": "Parakan\u00e3", "iso_1_code": null, "iso_3_code": "pak", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10531", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10528", + "scripts": [], + "own_tokenizer": false }, { "name": "Tenetehara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Guajaj\u00e1ra", "iso_1_code": null, "iso_3_code": "gub", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10533", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Temb\u00e9", "iso_1_code": null, "iso_3_code": "tqb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10534", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10532", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10525", + "scripts": [], + "own_tokenizer": false }, { "name": "Tup\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cocama", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Kukama-Kukamiria", "iso_1_code": null, "iso_3_code": "cod", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10537", + "scripts": [], + "own_tokenizer": false }, { "name": "Omagua", "iso_1_code": null, "iso_3_code": "omg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10538", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10536", + "scripts": [], + "own_tokenizer": false }, { "name": "Tup\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Potigu\u00e1ra", "iso_1_code": null, "iso_3_code": "pog", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10540", + "scripts": [], + "own_tokenizer": false }, { "name": "Tupinikin", "iso_1_code": null, "iso_3_code": "tpk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10541", + "scripts": [], + "own_tokenizer": false }, { "name": "Tupinamb\u00e1", "iso_1_code": null, "iso_3_code": "tpn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10542", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nhengatu", "iso_1_code": null, "iso_3_code": "yrl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10543", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10539", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10535", + "scripts": [], + "own_tokenizer": false }, { "name": "Wayamp\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Aur\u00e1", "iso_1_code": null, "iso_3_code": "aux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10545", + "scripts": [], + "own_tokenizer": false }, { "name": "Amanay\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anamb\u00e9", "iso_1_code": null, "iso_3_code": "aan", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10547", + "scripts": [], + "own_tokenizer": false }, { "name": "Amanay\u00e9", "iso_1_code": null, "iso_3_code": "ama", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10548", + "scripts": [], + "own_tokenizer": false }, { "name": "Guaj\u00e1", "iso_1_code": null, "iso_3_code": "gvj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10549", + "scripts": [], + "own_tokenizer": false }, { "name": "Turiw\u00e1ra", "iso_1_code": null, "iso_3_code": "twt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10550", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaapor", "iso_1_code": null, "iso_3_code": "urb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10551", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ararandew\u00e1ra", "iso_1_code": null, "iso_3_code": "xaj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10552", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10546", + "scripts": [], + "own_tokenizer": false }, { "name": "Wayamp\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tek\u00f3", "iso_1_code": null, "iso_3_code": "eme", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10554", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Wayampi", "iso_1_code": null, "iso_3_code": "oym", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10555", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10553", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10544", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10487", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10454", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Turkic.json b/data/Turkic.json index 97603287dba18f663db736dab3f981e8bd5aa0a7..b5df544f7a7691b28c4f2a7363cc620b31857f1a 100644 --- a/data/Turkic.json +++ b/data/Turkic.json @@ -2,594 +2,1463 @@ "name": "Turkic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Urum", "iso_1_code": null, "iso_3_code": "uum", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10557", + "scripts": [], + "own_tokenizer": false }, { "name": "Bolgar", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Chuvash", "iso_1_code": "cv", "iso_3_code": "chv", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10559", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10558", + "scripts": [], + "own_tokenizer": false }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Ainu", "iso_1_code": null, "iso_3_code": "aib", - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10561", + "scripts": [], + "own_tokenizer": false }, { "name": "Chagatai", "iso_1_code": null, "iso_3_code": "chg", - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10562", + "scripts": [], + "own_tokenizer": false }, { "name": "Ili Turki", "iso_1_code": null, "iso_3_code": "ili", - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10563", + "scripts": [], + "own_tokenizer": false }, { "name": "Uyghur", "iso_1_code": "ug", "iso_3_code": "uig", - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10564", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Uzbek, Northern", "iso_1_code": "uz", "iso_3_code": "uzn", - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10565", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Uzbek, Southern", "iso_1_code": "uz", "iso_3_code": "uzs", - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" + "tokenizers": { + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "scripts": [ + "Latn", + "Arab", + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10566", + "scripts": [ + "Arab" + ], + "own_tokenizer": false }, { "name": "Yugur, West", "iso_1_code": null, "iso_3_code": "ybe", - "tokenizer": { - "name": "uyghur", - "tokenizer": "StanzaTokenizer(\"ug\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10567", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10560", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Altai, Southern", "iso_1_code": null, "iso_3_code": "alt", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10569", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Altai, Northern", "iso_1_code": null, "iso_3_code": "atv", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10570", + "scripts": [], + "own_tokenizer": false }, { "name": "Shor", "iso_1_code": null, "iso_3_code": "cjs", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10571", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Dolgan", "iso_1_code": null, "iso_3_code": "dlg", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10572", + "scripts": [], + "own_tokenizer": false }, { "name": "Karagas", "iso_1_code": null, "iso_3_code": "kim", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10573", + "scripts": [], + "own_tokenizer": false }, { "name": "Khakas", "iso_1_code": null, "iso_3_code": "kjh", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10574", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Yakut", "iso_1_code": null, "iso_3_code": "sah", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10575", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Tuvan", "iso_1_code": null, "iso_3_code": "tyv", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10576", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10568", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Crimean Tatar", "iso_1_code": null, "iso_3_code": "crh", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10578", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Khalaj", "iso_1_code": null, "iso_3_code": "klj", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10579", + "scripts": [], + "own_tokenizer": false }, { "name": "Kashkay", "iso_1_code": null, "iso_3_code": "qxq", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10580", + "scripts": [], + "own_tokenizer": false }, { "name": "Salar", "iso_1_code": null, "iso_3_code": "slr", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10581", + "scripts": [], + "own_tokenizer": false }, { "name": "Azerbaijani", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Azerbaijani, South", "iso_1_code": "az", "iso_3_code": "azb", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "10583", + "scripts": [ + "Arab" + ], + "own_tokenizer": true }, { "name": "Azerbaijani, North", "iso_1_code": "az", "iso_3_code": "azj", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "10584", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": true } - ] + ], + "node_i": "10582", + "scripts": [], + "own_tokenizer": false }, { "name": "Turkish", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "turkish", - "tokenizer": "SpaCyTokenizer(\"tr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Balkan Gagauz Turkish", "iso_1_code": null, "iso_3_code": "bgx", - "tokenizer": { - "name": "turkish", - "tokenizer": "SpaCyTokenizer(\"tr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10586", + "scripts": [], + "own_tokenizer": false }, { "name": "Gagauz", "iso_1_code": null, "iso_3_code": "gag", - "tokenizer": { - "name": "turkish", - "tokenizer": "SpaCyTokenizer(\"tr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10587", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Khorasani Turkish", "iso_1_code": null, "iso_3_code": "kmz", - "tokenizer": { - "name": "turkish", - "tokenizer": "SpaCyTokenizer(\"tr\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10588", + "scripts": [], + "own_tokenizer": false }, { "name": "Turkish", "iso_1_code": "tr", "iso_3_code": "tur", - "tokenizer": { - "name": "turkish", - "tokenizer": "SpaCyTokenizer(\"tr\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10589", + "scripts": [ + "Latn" + ], + "own_tokenizer": true } - ] + ], + "node_i": "10585", + "scripts": [], + "own_tokenizer": false }, { "name": "Turkmenian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Turkmen", "iso_1_code": "tk", "iso_3_code": "tuk", - "tokenizer": { - "name": "azerbaijani", - "tokenizer": "SpaCyTokenizer(\"az\")" + "tokenizers": { + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10591", + "scripts": [ + "Latn", + "Cyrl", + "Arab" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10590", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10577", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Aralo-Caspian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Karakalpak", "iso_1_code": null, "iso_3_code": "kaa", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10594", + "scripts": [ + "Cyrl", + "Latn" + ], + "own_tokenizer": false }, { "name": "Kazakh", "iso_1_code": "kk", "iso_3_code": "kaz", - "tokenizer": { - "name": "kazakh", - "tokenizer": "StanzaTokenizer(\"kk\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10595", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Kyrgyz", "iso_1_code": "ky", "iso_3_code": "kir", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ky\")", + "original_lang_name": "kirghiz", + "original_lang_code": "kir", + "scripts": [ + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10596", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true }, { "name": "Nogai", "iso_1_code": null, "iso_3_code": "nog", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10597", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Kuman", "iso_1_code": null, "iso_3_code": "qwm", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10598", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10593", + "scripts": [], + "own_tokenizer": false }, { "name": "Ponto-Caspian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "down", "children": [ { "name": "Krimchak", "iso_1_code": null, "iso_3_code": "jct", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10600", + "scripts": [], + "own_tokenizer": false }, { "name": "Karaim", "iso_1_code": null, "iso_3_code": "kdr", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10601", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Karachay-Balkar", "iso_1_code": null, "iso_3_code": "krc", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10602", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Kumyk", "iso_1_code": null, "iso_3_code": "kum", - "tokenizer": { - "name": "kirghiz", - "tokenizer": "SpaCyTokenizer(\"ky\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10603", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10599", + "scripts": [], + "own_tokenizer": false }, { "name": "Uralian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "tatar", - "tokenizer": "SpaCyTokenizer(\"tt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Arab": { + "full_object": "SpaCyTokenizer(\"az\")", + "original_lang_name": "azerbaijani", + "original_lang_code": "aze", + "scripts": [ + "Arab", + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "bottom", "children": [ { "name": "Bashkort", "iso_1_code": "ba", "iso_3_code": "bak", - "tokenizer": { - "name": "tatar", - "tokenizer": "SpaCyTokenizer(\"tt\")" + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10605", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Chulym", "iso_1_code": null, "iso_3_code": "clw", - "tokenizer": { - "name": "tatar", - "tokenizer": "SpaCyTokenizer(\"tt\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10606", + "scripts": [], + "own_tokenizer": false }, { "name": "Siberian Tatar", "iso_1_code": null, "iso_3_code": "sty", - "tokenizer": { - "name": "tatar", - "tokenizer": "SpaCyTokenizer(\"tt\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10607", + "scripts": [], + "own_tokenizer": false }, { "name": "Tatar", "iso_1_code": "tt", "iso_3_code": "tat", - "tokenizer": { - "name": "tatar", - "tokenizer": "SpaCyTokenizer(\"tt\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "scripts": [ + "Latn", + "Cyrl" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10608", + "scripts": [ + "Latn", + "Cyrl" + ], + "own_tokenizer": true } - ] + ], + "node_i": "10604", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10592", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10556", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Tuu.json b/data/Tuu.json index 47cd5c17b8c6adb3a9462e58df0e3575902608d1..63cf9fc059c16cb9cebe84a408c7a902d0d45b27 100644 --- a/data/Tuu.json +++ b/data/Tuu.json @@ -2,82 +2,102 @@ "name": "Tuu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "!Ui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "\u2021Ungkue", "iso_1_code": null, "iso_3_code": "gku", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10611", + "scripts": [], + "own_tokenizer": false }, { "name": "Seroa", "iso_1_code": null, "iso_3_code": "kqu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10612", + "scripts": [], + "own_tokenizer": false }, { "name": "N\u01c1ng", "iso_1_code": null, "iso_3_code": "ngh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10613", + "scripts": [], + "own_tokenizer": false }, { "name": "\u01c0Xam", "iso_1_code": null, "iso_3_code": "xam", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10614", + "scripts": [], + "own_tokenizer": false }, { "name": "\u01c1Xegwi", "iso_1_code": null, "iso_3_code": "xeg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10615", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10610", + "scripts": [], + "own_tokenizer": false }, { "name": "Taa-Lower Nossob", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Taa", "iso_1_code": null, "iso_3_code": "nmn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10617", + "scripts": [], + "own_tokenizer": false }, { "name": "Lower Nossob", "iso_1_code": null, "iso_3_code": "nsb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10618", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10616", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10609", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Unclassified.json b/data/Unclassified.json index 3f0609a8ddcbb295c4f5c3fc0757168db61b14f4..cbef3a0646a91ec036732aefb05fc995d7358241 100644 --- a/data/Unclassified.json +++ b/data/Unclassified.json @@ -2,448 +2,560 @@ "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Alsea", "iso_1_code": null, "iso_3_code": "aes", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10620", + "scripts": [], + "own_tokenizer": false }, { "name": "Aguano", "iso_1_code": null, "iso_3_code": "aga", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10621", + "scripts": [], + "own_tokenizer": false }, { "name": "Awishira", "iso_1_code": null, "iso_3_code": "ash", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10622", + "scripts": [], + "own_tokenizer": false }, { "name": "Agavotaguerra", "iso_1_code": null, "iso_3_code": "avo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10623", + "scripts": [], + "own_tokenizer": false }, { "name": "Ar\u00e1ra, Mato Grosso", "iso_1_code": null, "iso_3_code": "axg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10624", + "scripts": [], + "own_tokenizer": false }, { "name": "Bung", "iso_1_code": null, "iso_3_code": "bqd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10625", + "scripts": [], + "own_tokenizer": false }, { "name": "Beothuk", "iso_1_code": null, "iso_3_code": "bue", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10626", + "scripts": [], + "own_tokenizer": false }, { "name": "Carabayo", "iso_1_code": null, "iso_3_code": "cby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10627", + "scripts": [], + "own_tokenizer": false }, { "name": "Doso", "iso_1_code": null, "iso_3_code": "dol", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10628", + "scripts": [], + "own_tokenizer": false }, { "name": "Gail", "iso_1_code": null, "iso_3_code": "gic", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10629", + "scripts": [], + "own_tokenizer": false }, { "name": "Himarim\u00e3", "iso_1_code": null, "iso_3_code": "hir", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10630", + "scripts": [], + "own_tokenizer": false }, { "name": "Haitian Vodoun Culture Language", "iso_1_code": null, "iso_3_code": "hvc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10631", + "scripts": [], + "own_tokenizer": false }, { "name": "Kara", "iso_1_code": null, "iso_3_code": "kah", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10632", + "scripts": [], + "own_tokenizer": false }, { "name": "Lufu", "iso_1_code": null, "iso_3_code": "ldq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10633", + "scripts": [], + "own_tokenizer": false }, { "name": "Lepki", "iso_1_code": null, "iso_3_code": "lpe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10634", + "scripts": [], + "own_tokenizer": false }, { "name": "Kasabe", "iso_1_code": null, "iso_3_code": "luw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10635", + "scripts": [], + "own_tokenizer": false }, { "name": "Majhwar", "iso_1_code": null, "iso_3_code": "mmj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10636", + "scripts": [], + "own_tokenizer": false }, { "name": "Mangue", "iso_1_code": null, "iso_3_code": "mom", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10637", + "scripts": [], + "own_tokenizer": false }, { "name": "Molof", "iso_1_code": null, "iso_3_code": "msl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10638", + "scripts": [], + "own_tokenizer": false }, { "name": "Namla", "iso_1_code": null, "iso_3_code": "naa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10639", + "scripts": [], + "own_tokenizer": false }, { "name": "Pankarar\u00e9", "iso_1_code": null, "iso_3_code": "pax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10640", + "scripts": [], + "own_tokenizer": false }, { "name": "Pijao", "iso_1_code": null, "iso_3_code": "pij", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10641", + "scripts": [], + "own_tokenizer": false }, { "name": "Polari", "iso_1_code": null, "iso_3_code": "pld", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10642", + "scripts": [], + "own_tokenizer": false }, { "name": "Mercheros", "iso_1_code": null, "iso_3_code": "quq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10643", + "scripts": [], + "own_tokenizer": false }, { "name": "Rer Bare", "iso_1_code": null, "iso_3_code": "rer", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10644", + "scripts": [], + "own_tokenizer": false }, { "name": "Murkim", "iso_1_code": null, "iso_3_code": "rmh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10645", + "scripts": [], + "own_tokenizer": false }, { "name": "Shabo", "iso_1_code": null, "iso_3_code": "sbf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10646", + "scripts": [], + "own_tokenizer": false }, { "name": "Kimki", "iso_1_code": null, "iso_3_code": "sbt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10647", + "scripts": [], + "own_tokenizer": false }, { "name": "Shanenawa", "iso_1_code": null, "iso_3_code": "swo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10648", + "scripts": [], + "own_tokenizer": false }, { "name": "Tingui-Boto", "iso_1_code": null, "iso_3_code": "tgv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10649", + "scripts": [], + "own_tokenizer": false }, { "name": "Truk\u00e1", "iso_1_code": null, "iso_3_code": "tka", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10650", + "scripts": [], + "own_tokenizer": false }, { "name": "Takelma", "iso_1_code": null, "iso_3_code": "tkm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10651", + "scripts": [], + "own_tokenizer": false }, { "name": "Tofanma", "iso_1_code": null, "iso_3_code": "tlg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10652", + "scripts": [], + "own_tokenizer": false }, { "name": "Trememb\u00e9", "iso_1_code": null, "iso_3_code": "tme", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10653", + "scripts": [], + "own_tokenizer": false }, { "name": "Traveller Scottish", "iso_1_code": null, "iso_3_code": "trl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10654", + "scripts": [], + "own_tokenizer": false }, { "name": "Lule", "iso_1_code": null, "iso_3_code": "ule", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10655", + "scripts": [], + "own_tokenizer": false }, { "name": "Usku", "iso_1_code": null, "iso_3_code": "ulf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10656", + "scripts": [], + "own_tokenizer": false }, { "name": "Kujarge", "iso_1_code": null, "iso_3_code": "vkj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10657", + "scripts": [], + "own_tokenizer": false }, { "name": "Wakon\u00e1", "iso_1_code": null, "iso_3_code": "waf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10658", + "scripts": [], + "own_tokenizer": false }, { "name": "Weyto", "iso_1_code": null, "iso_3_code": "woy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10659", + "scripts": [], + "own_tokenizer": false }, { "name": "Wasu", "iso_1_code": null, "iso_3_code": "wsu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10660", + "scripts": [], + "own_tokenizer": false }, { "name": "Waxianghua", "iso_1_code": null, "iso_3_code": "wxa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10661", + "scripts": [], + "own_tokenizer": false }, { "name": "Adai", "iso_1_code": null, "iso_3_code": "xad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10662", + "scripts": [], + "own_tokenizer": false }, { "name": "Kaimb\u00e9", "iso_1_code": null, "iso_3_code": "xai", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10663", + "scripts": [], + "own_tokenizer": false }, { "name": "Kenaboi", "iso_1_code": null, "iso_3_code": "xbn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10664", + "scripts": [], + "own_tokenizer": false }, { "name": "Kambiw\u00e1", "iso_1_code": null, "iso_3_code": "xbw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10665", + "scripts": [], + "own_tokenizer": false }, { "name": "Kembra", "iso_1_code": null, "iso_3_code": "xkw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10666", + "scripts": [], + "own_tokenizer": false }, { "name": "Kapinaw\u00e1", "iso_1_code": null, "iso_3_code": "xpn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10667", + "scripts": [], + "own_tokenizer": false }, { "name": "Aranama-Tamique", "iso_1_code": null, "iso_3_code": "xrt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10668", + "scripts": [], + "own_tokenizer": false }, { "name": "Solano", "iso_1_code": null, "iso_3_code": "xso", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10669", + "scripts": [], + "own_tokenizer": false }, { "name": "Yitha Yitha", "iso_1_code": null, "iso_3_code": "xth", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10670", + "scripts": [], + "own_tokenizer": false }, { "name": "Tambora", "iso_1_code": null, "iso_3_code": "xxt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10671", + "scripts": [], + "own_tokenizer": false }, { "name": "Pum\u00e9", "iso_1_code": null, "iso_3_code": "yae", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10672", + "scripts": [], + "own_tokenizer": false }, { "name": "Yeni", "iso_1_code": null, "iso_3_code": "yei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10673", + "scripts": [], + "own_tokenizer": false }, { "name": "Yetfa", "iso_1_code": null, "iso_3_code": "yet", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10674", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10619", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Uralic.json b/data/Uralic.json index c5d412c4a6cd74419b41bcaf1330558af55efa20..b586265b13dcaa50eb3f1ed4a08f26094b717932 100644 --- a/data/Uralic.json +++ b/data/Uralic.json @@ -2,653 +2,1321 @@ "name": "Uralic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Hungarian", "iso_1_code": "hu", "iso_3_code": "hun", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10676", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Khanty", "iso_1_code": null, "iso_3_code": "kca", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10677", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Mansi", "iso_1_code": null, "iso_3_code": "mns", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10678", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Finnic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Estonian, Standard", "iso_1_code": "et", "iso_3_code": "ekk", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"et\")", + "original_lang_name": "estonian", + "original_lang_code": "est", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "10680", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Finnish", "iso_1_code": "fi", "iso_3_code": "fin", - "tokenizer": { - "name": "finnish", - "tokenizer": "SpaCyTokenizer(\"fi\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10681", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Me\u00e4nkieli", "iso_1_code": null, "iso_3_code": "fit", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10682", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Finnish, Kven", "iso_1_code": null, "iso_3_code": "fkv", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10683", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ingrian", "iso_1_code": null, "iso_3_code": "izh", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10684", + "scripts": [], + "own_tokenizer": false }, { "name": "Karelian", "iso_1_code": null, "iso_3_code": "krl", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10685", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Liv", "iso_1_code": null, "iso_3_code": "liv", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10686", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ludian", "iso_1_code": null, "iso_3_code": "lud", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10687", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Livvi-Karelian", "iso_1_code": null, "iso_3_code": "olo", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10688", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Veps", "iso_1_code": null, "iso_3_code": "vep", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10689", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Vod", "iso_1_code": null, "iso_3_code": "vot", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10690", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "V\u00f5ro", "iso_1_code": "et", "iso_3_code": "vro", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"et\")", + "original_lang_name": "estonian", + "original_lang_code": "est", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": true + } }, - "source": "macrolanguage", - "children": [] + "children": [], + "node_i": "10691", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Krevinian", "iso_1_code": null, "iso_3_code": "zkv", - "tokenizer": { - "name": "estonian", - "tokenizer": "SpaCyTokenizer(\"et\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10692", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10679", + "scripts": [], + "own_tokenizer": false }, { "name": "Mari", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mari, Meadow", "iso_1_code": null, "iso_3_code": "mhr", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10694", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Mari, Hill", "iso_1_code": null, "iso_3_code": "mrj", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10695", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10693", + "scripts": [], + "own_tokenizer": false }, { "name": "Mordvin", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "erzya", - "tokenizer": "StanzaTokenizer(\"myv\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Moksha", "iso_1_code": null, "iso_3_code": "mdf", - "tokenizer": { - "name": "erzya", - "tokenizer": "StanzaTokenizer(\"myv\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10697", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Erzya", "iso_1_code": null, "iso_3_code": "myv", - "tokenizer": { - "name": "erzya", - "tokenizer": "StanzaTokenizer(\"myv\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10698", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": true } - ] + ], + "node_i": "10696", + "scripts": [], + "own_tokenizer": false }, { "name": "Permian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Udmurt", "iso_1_code": null, "iso_3_code": "udm", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10700", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Komi", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Komi-Permyak", "iso_1_code": "kv", "iso_3_code": "koi", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10702", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Komi-Zyrian", "iso_1_code": "kv", "iso_3_code": "kpv", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10703", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10701", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10699", + "scripts": [], + "own_tokenizer": false }, { "name": "Sami", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Saami, Akkala", "iso_1_code": null, "iso_3_code": "sia", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10706", + "scripts": [], + "own_tokenizer": false }, { "name": "Saami, Kildin", "iso_1_code": null, "iso_3_code": "sjd", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10707", + "scripts": [], + "own_tokenizer": false }, { "name": "Saami, Kemi", "iso_1_code": null, "iso_3_code": "sjk", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10708", + "scripts": [], + "own_tokenizer": false }, { "name": "Saami, Ter", "iso_1_code": null, "iso_3_code": "sjt", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10709", + "scripts": [], + "own_tokenizer": false }, { "name": "Saami, Inari", "iso_1_code": null, "iso_3_code": "smn", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10710", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Saami, Skolt", "iso_1_code": null, "iso_3_code": "sms", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10711", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10705", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Saami, Ume", "iso_1_code": null, "iso_3_code": "sju", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10713", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10712", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "bottom", "children": [ { "name": "Saami, Pite", "iso_1_code": null, "iso_3_code": "sje", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10716", + "scripts": [], + "own_tokenizer": false }, { "name": "Saami, North", "iso_1_code": "se", "iso_3_code": "sme", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "own", - "children": [] + "children": [], + "node_i": "10717", + "scripts": [ + "Latn" + ], + "own_tokenizer": true }, { "name": "Saami, Lule", "iso_1_code": null, "iso_3_code": "smj", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10718", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10715", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Saami, South", "iso_1_code": null, "iso_3_code": "sma", - "tokenizer": { - "name": "northern_sami", - "tokenizer": "StanzaTokenizer(\"se\")" + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "scripts": [ + "Latn" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10720", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10719", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10714", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10704", + "scripts": [], + "own_tokenizer": false }, { "name": "Samoyed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Mator", "iso_1_code": null, "iso_3_code": "mtm", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10722", + "scripts": [], + "own_tokenizer": false }, { "name": "Northern Samoyed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Nganasan", "iso_1_code": null, "iso_3_code": "nio", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10724", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Nenets", "iso_1_code": null, "iso_3_code": "yrk", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10725", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Enets", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Enets, Forest", "iso_1_code": null, "iso_3_code": "enf", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10727", + "scripts": [], + "own_tokenizer": false }, { "name": "Enets, Tundra", "iso_1_code": null, "iso_3_code": "enh", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10728", + "scripts": [], + "own_tokenizer": false }, { "name": "Yurats", "iso_1_code": null, "iso_3_code": "rts", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10729", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10726", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10723", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Samoyed", "iso_1_code": null, "iso_3_code": null, - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "scripts": [ + "Latn" + ], + "class_name": "SpaCyTokenizer", + "macrolanguage": false + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", "children": [ { "name": "Selkup", "iso_1_code": null, "iso_3_code": "sel", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "scripts": [ + "Cyrl" + ], + "class_name": "StanzaTokenizer", + "macrolanguage": false + } }, - "source": "down", - "children": [] + "children": [], + "node_i": "10731", + "scripts": [ + "Cyrl" + ], + "own_tokenizer": false }, { "name": "Kamas", "iso_1_code": null, "iso_3_code": "xas", - "tokenizer": { - "name": "hungarian", - "tokenizer": "SpaCyTokenizer(\"hu\")" - }, - "source": "down", - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10732", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10730", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10721", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10675", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Uto-Aztecan.json b/data/Uto-Aztecan.json index c75976361b1bbfbb6abfe1e5b5de3179d50d4ddd..6b69db2375fcbb9e9e30d1a82af5ab02d3982d21 100644 --- a/data/Uto-Aztecan.json +++ b/data/Uto-Aztecan.json @@ -2,700 +2,928 @@ "name": "Uto-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern Uto-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Hopi", "iso_1_code": null, "iso_3_code": "hop", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10735", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "T\u00fcbatulabal", "iso_1_code": null, "iso_3_code": "tub", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10736", + "scripts": [], + "own_tokenizer": false }, { "name": "Numic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Comanche", "iso_1_code": null, "iso_3_code": "com", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10739", + "scripts": [], + "own_tokenizer": false }, { "name": "Timbisha", "iso_1_code": null, "iso_3_code": "par", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10740", + "scripts": [], + "own_tokenizer": false }, { "name": "Shoshoni", "iso_1_code": null, "iso_3_code": "shh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10741", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10738", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ute-Southern Paiute", "iso_1_code": null, "iso_3_code": "ute", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10743", + "scripts": [], + "own_tokenizer": false }, { "name": "Kawaiisu", "iso_1_code": null, "iso_3_code": "xaw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10744", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10742", + "scripts": [], + "own_tokenizer": false }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mono", "iso_1_code": null, "iso_3_code": "mnr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10746", + "scripts": [], + "own_tokenizer": false }, { "name": "Paiute, Northern", "iso_1_code": null, "iso_3_code": "pao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10747", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10745", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10737", + "scripts": [], + "own_tokenizer": false }, { "name": "Takic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Luise\u00f1o", "iso_1_code": null, "iso_3_code": "lui", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10749", + "scripts": [], + "own_tokenizer": false }, { "name": "Serrano", "iso_1_code": null, "iso_3_code": "ser", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10750", + "scripts": [], + "own_tokenizer": false }, { "name": "Gabrielino-Fernande\u00f1o", "iso_1_code": null, "iso_3_code": "xgf", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10751", + "scripts": [], + "own_tokenizer": false }, { "name": "Cahuilla", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cahuilla", "iso_1_code": null, "iso_3_code": "chl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10753", + "scripts": [], + "own_tokenizer": false }, { "name": "Cupe\u00f1o", "iso_1_code": null, "iso_3_code": "cup", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10754", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10752", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10748", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10734", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Uto-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Corachol-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Pochutec", "iso_1_code": null, "iso_3_code": "xpo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10757", + "scripts": [], + "own_tokenizer": false }, { "name": "Cora-Huichol", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Huichol", "iso_1_code": null, "iso_3_code": "hch", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10759", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cora", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cora, Santa Teresa", "iso_1_code": null, "iso_3_code": "cok", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10761", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Cora, El Nayar", "iso_1_code": null, "iso_3_code": "crn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10762", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10760", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10758", + "scripts": [], + "own_tokenizer": false }, { "name": "Core Nahua", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nahuat", "iso_1_code": null, "iso_3_code": "ppl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10764", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nahuatl, Eastern Durango", "iso_1_code": null, "iso_3_code": "azd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10766", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Western Durango", "iso_1_code": null, "iso_3_code": "azn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10767", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Highland Puebla", "iso_1_code": null, "iso_3_code": "azz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10768", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Coatepec", "iso_1_code": null, "iso_3_code": "naz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10769", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Central Huasteca", "iso_1_code": null, "iso_3_code": "nch", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10770", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Northern Puebla", "iso_1_code": null, "iso_3_code": "ncj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10771", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Michoac\u00e1n", "iso_1_code": null, "iso_3_code": "ncl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10772", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Central Puebla", "iso_1_code": null, "iso_3_code": "ncx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10773", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Guerrero", "iso_1_code": null, "iso_3_code": "ngu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10774", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Tabasco", "iso_1_code": null, "iso_3_code": "nhc", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10775", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Eastern Huasteca", "iso_1_code": null, "iso_3_code": "nhe", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10776", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Tetelcingo", "iso_1_code": null, "iso_3_code": "nhg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10777", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Zacatl\u00e1n-Ahuacatl\u00e1n-Tepetzintla", "iso_1_code": null, "iso_3_code": "nhi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10778", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Isthmus-Cosoleacaque", "iso_1_code": null, "iso_3_code": "nhk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10779", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Morelos", "iso_1_code": null, "iso_3_code": "nhm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10780", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Central", "iso_1_code": null, "iso_3_code": "nhn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10781", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Isthmus-Pajapan", "iso_1_code": null, "iso_3_code": "nhp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10782", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Huaxcaleca", "iso_1_code": null, "iso_3_code": "nhq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10783", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Ometepec", "iso_1_code": null, "iso_3_code": "nht", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10784", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Temascaltepec", "iso_1_code": null, "iso_3_code": "nhv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10785", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Western Huasteca", "iso_1_code": null, "iso_3_code": "nhw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10786", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Isthmus-Mecayapan", "iso_1_code": null, "iso_3_code": "nhx", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10787", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Northern Oaxaca", "iso_1_code": null, "iso_3_code": "nhy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10788", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Santa Mar\u00eda la Alta", "iso_1_code": null, "iso_3_code": "nhz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10789", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Orizaba", "iso_1_code": null, "iso_3_code": "nlv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10790", + "scripts": [], + "own_tokenizer": false }, { "name": "Nahuatl, Southeastern Puebla", "iso_1_code": null, "iso_3_code": "npl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10791", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Sierra Negra", "iso_1_code": null, "iso_3_code": "nsu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10792", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Nahuatl, Tlamacazapa", "iso_1_code": null, "iso_3_code": "nuz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10793", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10765", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10763", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10756", + "scripts": [], + "own_tokenizer": false }, { "name": "Pimic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tohono O\u2019odham", "iso_1_code": null, "iso_3_code": "ood", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10795", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Pima Bajo", "iso_1_code": null, "iso_3_code": "pia", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10796", + "scripts": [], + "own_tokenizer": false }, { "name": "Tepecano", "iso_1_code": null, "iso_3_code": "tep", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10797", + "scripts": [], + "own_tokenizer": false }, { "name": "Tepehuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tepehuan, Northern", "iso_1_code": null, "iso_3_code": "ntp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10799", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tepehuan, Southeastern", "iso_1_code": null, "iso_3_code": "stp", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10800", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tepehuan, Southwestern", "iso_1_code": null, "iso_3_code": "tla", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10801", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10798", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10794", + "scripts": [], + "own_tokenizer": false }, { "name": "Taracahitic", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tubar", "iso_1_code": null, "iso_3_code": "tbu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10803", + "scripts": [], + "own_tokenizer": false }, { "name": "Cahitan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Mayo", "iso_1_code": null, "iso_3_code": "mfy", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10805", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yaqui", "iso_1_code": null, "iso_3_code": "yaq", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10806", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10804", + "scripts": [], + "own_tokenizer": false }, { "name": "Opatan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Eudeve", "iso_1_code": null, "iso_3_code": "eud", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10808", + "scripts": [], + "own_tokenizer": false }, { "name": "Opata", "iso_1_code": null, "iso_3_code": "opt", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10809", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10807", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarahumaran", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Huarij\u00edo", "iso_1_code": null, "iso_3_code": "var", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10811", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tarahumara", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tarahumara, Western", "iso_1_code": null, "iso_3_code": "tac", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10813", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tarahumara, Central", "iso_1_code": null, "iso_3_code": "tar", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10814", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tarahumara, Southeastern", "iso_1_code": null, "iso_3_code": "tcu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10815", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarahumara, Northern", "iso_1_code": null, "iso_3_code": "thh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10816", + "scripts": [], + "own_tokenizer": false }, { "name": "Tarahumara, Southwestern", "iso_1_code": null, "iso_3_code": "twr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10817", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10812", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10810", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10802", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10755", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10733", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Wakashan.json b/data/Wakashan.json index 6bad9592c1ff64f7de024c9416108b80d55d5ec0..af4495199987dbec9527b7420996caa6296aa158 100644 --- a/data/Wakashan.json +++ b/data/Wakashan.json @@ -2,83 +2,103 @@ "name": "Wakashan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Northern Wakashan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Haisla", "iso_1_code": null, "iso_3_code": "has", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10820", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwakiutlan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Heiltsuk", "iso_1_code": null, "iso_3_code": "hei", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10822", + "scripts": [], + "own_tokenizer": false }, { "name": "Kwakwala", "iso_1_code": null, "iso_3_code": "kwk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10823", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10821", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10819", + "scripts": [], + "own_tokenizer": false }, { "name": "Southern Wakashan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ditidaht", "iso_1_code": null, "iso_3_code": "dtd", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10825", + "scripts": [], + "own_tokenizer": false }, { "name": "Makah", "iso_1_code": null, "iso_3_code": "myh", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10826", + "scripts": [], + "own_tokenizer": false }, { "name": "Nuu-chah-nulth", "iso_1_code": null, "iso_3_code": "nuk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10827", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10824", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10818", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/West Papuan.json b/data/West Papuan.json index 58f135915a664a7cf0376a60d693f53a481e3ad1..5b179336d491a62edbba371e08d2ab147be6aa82 100644 --- a/data/West Papuan.json +++ b/data/West Papuan.json @@ -2,255 +2,327 @@ "name": "West Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "North Halmahera", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Galela-Loloda", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Galela", "iso_1_code": null, "iso_3_code": "gbi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10831", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Laba", "iso_1_code": null, "iso_3_code": "lau", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10832", + "scripts": [], + "own_tokenizer": false }, { "name": "Loloda", "iso_1_code": null, "iso_3_code": "loa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10833", + "scripts": [], + "own_tokenizer": false }, { "name": "Modole", "iso_1_code": null, "iso_3_code": "mqo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10834", + "scripts": [], + "own_tokenizer": false }, { "name": "Pagu", "iso_1_code": null, "iso_3_code": "pgu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10835", + "scripts": [], + "own_tokenizer": false }, { "name": "Tabaru", "iso_1_code": null, "iso_3_code": "tby", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10836", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tobelo", "iso_1_code": null, "iso_3_code": "tlb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10837", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Tugutil", "iso_1_code": null, "iso_3_code": "tuj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10838", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10830", + "scripts": [], + "own_tokenizer": false }, { "name": "Sahu", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Gamkonora", "iso_1_code": null, "iso_3_code": "gak", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10840", + "scripts": [], + "own_tokenizer": false }, { "name": "Ibu", "iso_1_code": null, "iso_3_code": "ibu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10841", + "scripts": [], + "own_tokenizer": false }, { "name": "Kao", "iso_1_code": null, "iso_3_code": "kax", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10842", + "scripts": [], + "own_tokenizer": false }, { "name": "Sahu", "iso_1_code": null, "iso_3_code": "saj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10843", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Waioli", "iso_1_code": null, "iso_3_code": "wli", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10844", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10839", + "scripts": [], + "own_tokenizer": false }, { "name": "Ternate-Tidore", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ternate", "iso_1_code": null, "iso_3_code": "tft", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10846", + "scripts": [], + "own_tokenizer": false }, { "name": "Tidore", "iso_1_code": null, "iso_3_code": "tvo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10847", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10845", + "scripts": [], + "own_tokenizer": false }, { "name": "West Makian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Makian, West", "iso_1_code": null, "iso_3_code": "mqs", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10849", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10848", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10829", + "scripts": [], + "own_tokenizer": false }, { "name": "West Bird\u2019s Head", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Tehit", "iso_1_code": null, "iso_3_code": "kps", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10851", + "scripts": [], + "own_tokenizer": false }, { "name": "Kalabra", "iso_1_code": null, "iso_3_code": "kzz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10852", + "scripts": [], + "own_tokenizer": false }, { "name": "Moraid", "iso_1_code": null, "iso_3_code": "msg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10853", + "scripts": [], + "own_tokenizer": false }, { "name": "Moi Kelim", "iso_1_code": null, "iso_3_code": "mxn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10854", + "scripts": [], + "own_tokenizer": false }, { "name": "Moi Lemas", "iso_1_code": null, "iso_3_code": "sbg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10855", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10850", + "scripts": [], + "own_tokenizer": false }, { "name": "Yapen", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Saweru", "iso_1_code": null, "iso_3_code": "swr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10857", + "scripts": [], + "own_tokenizer": false }, { "name": "Yawa", "iso_1_code": null, "iso_3_code": "yva", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10858", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10856", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10828", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Wintuan.json b/data/Wintuan.json index 94150961d96aa9edd47bf618ab1972aa5600ab7e..7a27d1c24580fa9d7203d007a5748cbbd5e41a1b 100644 --- a/data/Wintuan.json +++ b/data/Wintuan.json @@ -2,32 +2,40 @@ "name": "Wintuan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nomlaki", "iso_1_code": null, "iso_3_code": "nol", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10860", + "scripts": [], + "own_tokenizer": false }, { "name": "Patwin", "iso_1_code": null, "iso_3_code": "pwi", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10861", + "scripts": [], + "own_tokenizer": false }, { "name": "Wintu", "iso_1_code": null, "iso_3_code": "wnw", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10862", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10859", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Witotoan.json b/data/Witotoan.json index 3aacf1cf43b06db02c6d598e81135d40ef277b8a..4f4bbad69f622eda65ab62f5bd71cf95a8ff9ac3 100644 --- a/data/Witotoan.json +++ b/data/Witotoan.json @@ -2,100 +2,132 @@ "name": "Witotoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Nonuya", "iso_1_code": null, "iso_3_code": "noj", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10864", + "scripts": [], + "own_tokenizer": false }, { "name": "Proto-Bora-Muinane", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Muinane", "iso_1_code": null, "iso_3_code": "bmr", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10866", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Bora", "iso_1_code": null, "iso_3_code": "boa", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10867", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10865", + "scripts": [], + "own_tokenizer": false }, { "name": "Proto-Huitoto-Ocaina", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ocaina", "iso_1_code": null, "iso_3_code": "oca", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10869", + "scripts": [], + "own_tokenizer": false }, { "name": "Early Huitoto", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Witoto, Nipode", "iso_1_code": null, "iso_3_code": "hux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10871", + "scripts": [], + "own_tokenizer": false }, { "name": "Proto-Minica-Murui", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Witoto, Minika", "iso_1_code": null, "iso_3_code": "hto", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10873", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Witoto, Murui", "iso_1_code": null, "iso_3_code": "huu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10874", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10872", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10870", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10868", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10863", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yaguan.json b/data/Yaguan.json index fef899f2400881d4966327b75e7d7b46d5c7b46e..b9b114cdd19a041ab077cba42724e8e033264d2f 100644 --- a/data/Yaguan.json +++ b/data/Yaguan.json @@ -2,24 +2,32 @@ "name": "Yaguan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yagua", "iso_1_code": null, "iso_3_code": "yad", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10876", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yameo", "iso_1_code": null, "iso_3_code": "yme", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10877", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10875", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yanomaman.json b/data/Yanomaman.json index d3c47b0bf9a651154974b8d37f75a66349732493..2b042f7186841fd485795277a3185d807f28bd6c 100644 --- a/data/Yanomaman.json +++ b/data/Yanomaman.json @@ -2,48 +2,64 @@ "name": "Yanomaman", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yanomam\u00f6", "iso_1_code": null, "iso_3_code": "guu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10879", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Ninam", "iso_1_code": null, "iso_3_code": "shb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10880", + "scripts": [], + "own_tokenizer": false }, { "name": "Yanom\u00e1mi", "iso_1_code": null, "iso_3_code": "wca", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10881", + "scripts": [], + "own_tokenizer": false }, { "name": "Sanum\u00e1", "iso_1_code": null, "iso_3_code": "xsu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10882", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Yaroam\u00eb", "iso_1_code": null, "iso_3_code": "yro", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10883", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10878", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yele-West New Britain.json b/data/Yele-West New Britain.json index 9f6644977ef6d478ee7ef815ace4111153b9294d..c97b947b0cfc06ea06cb99bd0b77eca300ae7331 100644 --- a/data/Yele-West New Britain.json +++ b/data/Yele-West New Britain.json @@ -2,50 +2,66 @@ "name": "Yele-West New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "West New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Anem", "iso_1_code": null, "iso_3_code": "anz", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10886", + "scripts": [], + "own_tokenizer": false }, { "name": "Pele-Ata", "iso_1_code": null, "iso_3_code": "ata", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10887", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10885", + "scripts": [], + "own_tokenizer": false }, { "name": "Yele", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Y\u00e9l\u00ee Dnye", "iso_1_code": null, "iso_3_code": "yle", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10889", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10888", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10884", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yeniseian.json b/data/Yeniseian.json index 1a5c4b188ea839154dbaa0628af8442ca00cf35d..c76e57d1bf55e27e17509775b7e26bb76d4c0fa0 100644 --- a/data/Yeniseian.json +++ b/data/Yeniseian.json @@ -2,48 +2,60 @@ "name": "Yeniseian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ket", "iso_1_code": null, "iso_3_code": "ket", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10891", + "scripts": [], + "own_tokenizer": false }, { "name": "Pumpokol", "iso_1_code": null, "iso_3_code": "xpm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10892", + "scripts": [], + "own_tokenizer": false }, { "name": "Arin", "iso_1_code": null, "iso_3_code": "xrn", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10893", + "scripts": [], + "own_tokenizer": false }, { "name": "Yug", "iso_1_code": null, "iso_3_code": "yug", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10894", + "scripts": [], + "own_tokenizer": false }, { "name": "Kott", "iso_1_code": null, "iso_3_code": "zko", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10895", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10890", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yokutsan.json b/data/Yokutsan.json index ee2023dc022bffe41eaf50e02b85d5a726482aa0..382c5b1c28969c779c1165c15dce9af31ac7abf0 100644 --- a/data/Yokutsan.json +++ b/data/Yokutsan.json @@ -2,16 +2,20 @@ "name": "Yokutsan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yokuts", "iso_1_code": null, "iso_3_code": "yok", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10897", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10896", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yuat.json b/data/Yuat.json index d58f4afa806ca04f37c374cba1f69b370611f708..e9e3877bd3707875a757ae3559713a271b0c7655 100644 --- a/data/Yuat.json +++ b/data/Yuat.json @@ -2,48 +2,62 @@ "name": "Yuat", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Bun", "iso_1_code": null, "iso_3_code": "buv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10899", + "scripts": [], + "own_tokenizer": false }, { "name": "Biwat", "iso_1_code": null, "iso_3_code": "bwm", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10900", + "scripts": [], + "own_tokenizer": false }, { "name": "Changriwa", "iso_1_code": null, "iso_3_code": "cga", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10901", + "scripts": [], + "own_tokenizer": false }, { "name": "Kyenele", "iso_1_code": null, "iso_3_code": "kql", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10902", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Mekmek", "iso_1_code": null, "iso_3_code": "mvk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10903", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10898", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yukaghir.json b/data/Yukaghir.json index 7edbcd2f6471aa066e0c35339e1997258093be6b..e0250e64b6c1b7e3f58f8068f6ed1f9d5e390712 100644 --- a/data/Yukaghir.json +++ b/data/Yukaghir.json @@ -2,40 +2,50 @@ "name": "Yukaghir", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Omok", "iso_1_code": null, "iso_3_code": "omk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10905", + "scripts": [], + "own_tokenizer": false }, { "name": "Chuvantsy", "iso_1_code": null, "iso_3_code": "xcv", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10906", + "scripts": [], + "own_tokenizer": false }, { "name": "Yukaghir, Northern", "iso_1_code": null, "iso_3_code": "ykg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10907", + "scripts": [], + "own_tokenizer": false }, { "name": "Yukaghir, Southern", "iso_1_code": null, "iso_3_code": "yux", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10908", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10904", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Yukian.json b/data/Yukian.json index 4a75555815b235c7f75c27a7c62067ceac76ba9e..4cb4ee183891c2787d321bf3958f9569dd482dcf 100644 --- a/data/Yukian.json +++ b/data/Yukian.json @@ -2,33 +2,41 @@ "name": "Yukian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Wappo", "iso_1_code": null, "iso_3_code": "wao", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10910", + "scripts": [], + "own_tokenizer": false }, { "name": "Core Yukian", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Yuki", "iso_1_code": null, "iso_3_code": "yuk", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10912", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10911", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10909", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Zamucoan.json b/data/Zamucoan.json index 6f28f5db1c0033dfb87213a054c10aac1e01617b..28e2da0ff3eea00f9d7c590fff78b675af6fb471 100644 --- a/data/Zamucoan.json +++ b/data/Zamucoan.json @@ -2,24 +2,34 @@ "name": "Zamucoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Ayoreo", "iso_1_code": null, "iso_3_code": "ayo", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10914", + "scripts": [ + "Latn" + ], + "own_tokenizer": false }, { "name": "Chamacoco", "iso_1_code": null, "iso_3_code": "ceg", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10915", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10913", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/data/Zaparoan.json b/data/Zaparoan.json index c174221000cafff838922cbb01cc29a0b6f046f3..e7a5fac1c71d59304a60536756b77227a57c90a7 100644 --- a/data/Zaparoan.json +++ b/data/Zaparoan.json @@ -2,75 +2,95 @@ "name": "Zaparoan", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Iquito-Cahuarano", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Cahuarano", "iso_1_code": null, "iso_3_code": "cah", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10918", + "scripts": [], + "own_tokenizer": false }, { "name": "Iquitu", "iso_1_code": null, "iso_3_code": "iqu", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10919", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10917", + "scripts": [], + "own_tokenizer": false }, { "name": "Z\u00e1paro", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Z\u00e1paro", "iso_1_code": null, "iso_3_code": "zro", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10921", + "scripts": [], + "own_tokenizer": false }, { "name": "Arabela-Andoa", "iso_1_code": null, "iso_3_code": null, - "tokenizer": null, - "source": null, + "tokenizers": {}, "children": [ { "name": "Andoa", "iso_1_code": null, "iso_3_code": "anb", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10923", + "scripts": [], + "own_tokenizer": false }, { "name": "Arabela", "iso_1_code": null, "iso_3_code": "arl", - "tokenizer": null, - "source": null, - "children": [] + "tokenizers": {}, + "children": [], + "node_i": "10924", + "scripts": [ + "Latn" + ], + "own_tokenizer": false } - ] + ], + "node_i": "10922", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10920", + "scripts": [], + "own_tokenizer": false } - ] + ], + "node_i": "10916", + "scripts": [], + "own_tokenizer": false } \ No newline at end of file diff --git a/index.html b/index.html index 642425200ad459d607555ea8c7649df1f2ac0dc8..21988772ea1838203e7fa07c969f919a716a296d 100644 --- a/index.html +++ b/index.html @@ -172,7 +172,12 @@ -
+ + +
+
+
+
diff --git a/script.js b/script.js index afb2c0b5ae2b59c1f556d478afd90e99cab03f82..95405dd04546e15de8c929271eea01f7a1d5eec2 100644 --- a/script.js +++ b/script.js @@ -4,13 +4,17 @@ document.addEventListener("DOMContentLoaded", function() { const searchBar = document.getElementById('search-bar'); const depthSelector = document.getElementById('depth-number'); const tokenizerFilterDiv = document.getElementById('tokenizer-filter'); + const scriptFiltersDiv = document.getElementById('script-filter'); + const showNonGlotlidCheckbox = document.getElementById('show-non-glotlid'); const clearTokenizersFilter = document.getElementById('clear-tokenizers'); + const clearScriptsFilter = document.getElementById('clear-scripts'); let nodeToCenter = null; let currentTreeData = null; let expandedNodes; let currentTransform = d3.zoomIdentity; // Save the current zoom and transform // Get unique tokenizer names let tokenizerNames; + let scriptNames; let color; function setExpanded(id, state = true){ @@ -50,6 +54,7 @@ document.addEventListener("DOMContentLoaded", function() { currentTreeData = addNodeIds(data); expandedNodes = new Set([0]); updateTokenizerFilter(data); + updateScriptFilter(data); drawVisibleNodes(true); }); } @@ -58,10 +63,47 @@ document.addEventListener("DOMContentLoaded", function() { searchBar.addEventListener('input', () => searchNode(searchBar.value)); depthSelector.addEventListener('change', (_) => drawVisibleNodes()); + showNonGlotlidCheckbox.addEventListener('click', (_) => drawVisibleNodes()); clearTokenizersFilter.addEventListener('click', (_) => { tokenizerFilterDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false); drawVisibleNodes(); }) + clearScriptsFilter.addEventListener('click', (_) => { + scriptFiltersDiv.querySelectorAll('input:checked').forEach((a) => a.checked = false); + drawVisibleNodes(); + }) + + function getScriptNames(node, namesSet = new Set()) { + if (!node) + return namesSet; + if (node.scripts.length > 0) { + for(const script of node.scripts) + namesSet.add(script); + } else + namesSet.add('x'); + if (node.children) { + node.children.forEach(child => getScriptNames(child, namesSet)); + } + return namesSet; + } + + function updateScriptFilter(data) { + scriptNames = Array.from(getScriptNames(data)); + scriptFiltersDiv.innerHTML = ''; + scriptNames.forEach(name => { + const checkbox = document.createElement('input'); + checkbox.type = 'checkbox'; + checkbox.value = name; + checkbox.checked = true; + checkbox.addEventListener('change', () => drawVisibleNodes()); + const label = document.createElement('label'); + label.appendChild(checkbox); + label.appendChild(document.createTextNode(name)); + + scriptFiltersDiv.appendChild(label); + scriptFiltersDiv.appendChild(document.createElement('br')); + }); + } function updateTokenizerFilter(data) { tokenizerNames = Array.from(getTokenizerNames(data)); @@ -95,8 +137,9 @@ document.addEventListener("DOMContentLoaded", function() { function getTokenizerNames(node, namesSet = new Set()) { if (!node) return namesSet; - if (node.tokenizer && node.tokenizer.name) { - namesSet.add(node.tokenizer.name); + if (node.tokenizers) { + for(const [script, tokenizer] of Object.entries(node.tokenizers)) + namesSet.add(tokenizer.original_lang_name); } else namesSet.add('x') if (node.children) { @@ -160,29 +203,40 @@ document.addEventListener("DOMContentLoaded", function() { .domain([1, root.data.subtreeSize]) .range([5, 20]); // Adjust the range as needed for minimum and maximum circle + + const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked')) + .map(input => input.value); + const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked')) + .map(input => input.value); + function getColorTokenizer(node) { + // debugger; + const toks = Object.entries(node.tokenizers).filter(([script, obj]) => selectedScripts.includes(script) && selectedTokenizers.includes(obj.original_lang_name)); + return toks.length > 0 ? toks[0][1].original_lang_name : 'unknown'; + } + // Add circles to nodes node.append('circle') .filter(d => !d.data.iso_3_code) .attr('r', d => sizeScale(d.data.subtreeSize)) .attr('fill', d => { - const tokenizerName = d.data.tokenizer?.name || "unknown"; + const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); - node.filter(d => d.data.iso_3_code && d.data.source !== 'own' && d.data.source !== 'macrolanguage') // Select leaf nodes + node.filter(d => d.data.iso_3_code && !d.data.own_tokenizer) // Select leaf nodes .append('rect') .attr('width', 10) .attr('height', 10) .attr('x', -5) .attr('y', -5) .attr('fill', d => { - const tokenizerName = d.data.tokenizer?.name || "unknown"; + const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); - node.filter(d => d.data.source === 'own' || d.data.source === 'macrolanguage') // Select leaf nodes with "own" assignment + node.filter(d => d.data.own_tokenizer) // Select leaf nodes with "own" assignment .append('path') .attr('d', d3.symbol().type(d3.symbolTriangle).size(100)) // Adjust size as needed .attr('fill', d => { - const tokenizerName = d.data.tokenizer?.name || "unknown"; + const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); @@ -191,7 +245,10 @@ document.addEventListener("DOMContentLoaded", function() { .attr('dy', 4) .attr('x', d => sizeScale(d.data.subtreeSize) + 4) .attr('text-anchor', 'start') - .text(d => `${d.data.name} - ${d.data.tokenizer?.name || 'x'}${d.data.iso_3_code ? '' : ' (' + d.data.subtreeSize + ')'}`); + .text(d => { + const tokenizerName = getColorTokenizer(d.data); + return `${d.data.name} - ${tokenizerName || 'x'}${d.data.iso_3_code ? '' : ' (' + d.data.subtreeSize + ')'}` + }); if (!currentTransform || recenter) currentTransform = d3.zoomIdentity.translate(width / 2, height / 2); @@ -216,12 +273,16 @@ document.addEventListener("DOMContentLoaded", function() { hoverBox.style.display = "block"; hoverBox.style.left = (event.pageX) + "px"; hoverBox.style.top = (event.pageY) + "px"; + const tokenizersList = Object.keys(d.data.tokenizers).map((script) => `
  • ${script}: ${d.data.tokenizers[script]['class_name']}-${d.data.tokenizers[script]['original_lang_name']}
  • `).join('') hoverBox.innerHTML = ` Name: ${d.data.name}
    - ISO 1 Code: ${d.data.iso_1_code}
    - ISO 3 Code: ${d.data.iso_3_code}
    - Tokenizer: ${d.data.tokenizer?.tokenizer} (${d.data.tokenizer?.name})
    - Tokenizer assignment: ${d.data.source}
    + ${d.data.iso_1_code ? 'ISO 1 Code: ' + d.data.iso_1_code + '
    ' : ''} + ${d.data.iso_3_code ? 'ISO 3 Code: ' + d.data.iso_3_code + '
    ' : ''} + ${d.data.scripts.length > 0 ? 'Scripts: ' + d.data.scripts.join(', ') + '
    ' : ''} + ${d.data.iso_3_code ? 'In GlotLID: ' + (d.data.scripts.length > 0 ? 'YES' : 'NO') + '
    ' : ''} + + Tokenizers: + Tokenizer assignment: ${d.data.own_tokenizer ? 'Supported lang' : 'Auto-assigned'}
    Subtree size: ${d.data.subtreeSize} `; }).on("mousemove", function(event) { @@ -244,10 +305,13 @@ document.addEventListener("DOMContentLoaded", function() { // Get selected tokenizers const selectedTokenizers = Array.from(tokenizerFilterDiv.querySelectorAll('input:checked')) .map(input => input.value); + const selectedScripts = Array.from(scriptFiltersDiv.querySelectorAll('input:checked')) + .map(input => input.value); + const showNonGlotlid = showNonGlotlidCheckbox.checked; // Create a new root node containing only selected tokenizers and their ancestors function filterHierarchy(node, parentExpanded = true, depth = 0) { - if (!parentExpanded && !(depthSelector.value == 0 || depth <= depthSelector.value)) { + if ((node.iso_3_code && node.scripts.length === 0 && !showNonGlotlid) || (!parentExpanded && !(depthSelector.value == 0 || depth <= depthSelector.value))) { return null; } @@ -255,7 +319,8 @@ document.addEventListener("DOMContentLoaded", function() { .map(child => filterHierarchy(child, expandedNodes.has(node.id), depth + 1)) .filter(child => child !== null); - if (!parentExpanded && filteredChildren.length === 0 && (!selectedTokenizers.includes(node.tokenizer ? node.tokenizer.name : 'x')) && node.id != 0) + const shouldBeShown = selectedTokenizers.some(tok => (node.tokenizers ? Object.values(node.tokenizers).map(x => x.original_lang_name) : ['x']).includes(tok)) && selectedScripts.some(scr => (node.scripts.length ? node.scripts : ['x']).includes(scr) && (node.scripts.length > 0 || showNonGlotlid)); // if one of the leaves has this tokenizer and the selected scripts + if (!parentExpanded && filteredChildren.length === 0 && !(node.children.length === 0 && shouldBeShown) && node.id != 0) return null; return { diff --git a/style.css b/style.css index 411bda5c7afe76c31132f3c8a4945a03c110d1f4..50f9aebbc7adda53f63681ecf0e172461a462026 100644 --- a/style.css +++ b/style.css @@ -25,17 +25,19 @@ body { height: 100vh; overflow: hidden; } -#language-family-select, #search-bar, #tokenizer-filter { +#language-family-select, #search-bar, #tokenizer-filter, #script-filter { /*position: absolute;*/ /*top: 10px;*/ background: white; - border: 1px solid #ddd; padding: 5px; z-index: 10; } -#tokenizer-filter { +#tokenizer-filter, #script-filter { max-height: 200px; overflow-y: auto; + max-width: 200px; + float:left; + padding-right: 20px; } #hover-box { position: absolute; @@ -49,4 +51,6 @@ body { position: absolute; top: 20px; left: 20px; + max-width: 400px; + border: 1px solid #ddd; } \ No newline at end of file