language-metric-analysis / constants.py
mshamrai's picture
chore: init demo
f9b063b
language_subfamilies = {
"Afrikaans": "West Germanic",
"Albanian": "Albanian",
"Arabic": "Semitic",
"Egyptian Arabic": "Semitic",
"Aragonese": "Romance",
"Armenian": "Armenian",
"Asturian": "Romance",
"Azerbaijani": "Oghuz",
"Bashkir": "Kypchak",
"Basque": "Language Isolate",
"Bavarian": "Austro-Bavarian",
"Belarusian": "East Slavic",
"Bengali": "Eastern Indo-Aryan",
"Bishnupriya Manipuri": "Eastern Indo-Aryan",
"Bosnian": "South Slavic",
"Breton": "Brythonic",
"Bulgarian": "South Slavic",
"Burmese": "Burmish",
"Catalan": "Romance",
"Cebuano": "Central Philippine",
"Chechen": "Nakh-Daghestanian",
"Chinese (Simplified)": "Sinitic",
"Chinese (Traditional)": "Sinitic",
"Min Nan Chinese": "Sinitic",
"Chuvash": "Oghur",
"Croatian": "South Slavic",
"Czech": "West Slavic",
"Danish": "North Germanic",
"Dutch": "West Germanic",
"English": "West Germanic",
"Estonian": "Finnic",
"Finnish": "Finnic",
"French": "Gallo-Romance",
"Galician": "Gallo-Romance",
"Georgian": "Kartvelian",
"German": "West Germanic",
"Greek": "Hellenic",
"Gujarati": "Gujarati",
"Haitian": "French-based Creole",
"Hebrew": "Semitic",
"Hindi": "Central Indo-Aryan",
"Hungarian": "Ugric",
"Icelandic": "North Germanic",
"Ido": "Constructed",
"Indonesian": "Malayic",
"Irish": "Goidelic",
"Italian": "Italo-Dalmatian",
"Japanese": "Japonic",
"Javanese": "Javanic",
"Kannada": "Southern Dravidian",
"Kazakh": "Kypchak",
"Kirghiz": "Kypchak",
"Korean": "Koreanic",
"Latin": "Italic",
"Latvian": "Baltic",
"Lithuanian": "Baltic",
"Lombard": "Gallo-Italic",
"Low Saxon": "West Germanic",
"Luxembourgish": "West Germanic",
"Macedonian": "South Slavic",
"Malagasy": "Malayic",
"Malay": "Malayic",
"Malayalam": "Southern Dravidian",
"Marathi": "Central Indo-Aryan",
"Minangkabau": "Malayic",
"Nepali": "Eastern Indo-Aryan",
"Newar": "Newaric",
"Norwegian (Bokmal)": "North Germanic",
"Norwegian (Nynorsk)": "North Germanic",
"Occitan": "Gallo-Romance",
"Persian (Farsi)": "Iranian",
"Piedmontese": "Gallo-Italic",
"Polish": "West Slavic",
"Portuguese": "Iberian Romance",
"Punjabi": "Punjabi",
"Romanian": "Eastern Romance",
"Russian": "East Slavic",
"Scots": "West Germanic",
"Serbian": "South Slavic",
"Serbo-Croatian": "South Slavic",
"Sicilian": "Italo-Dalmatian",
"Slovak": "West Slavic",
"Slovenian": "South Slavic",
"South Azerbaijani": "Oghuz",
"Spanish": "Iberian Romance",
"Sundanese": "Sundic",
"Swahili": "Bantu",
"Swedish": "North Germanic",
"Tagalog": "Central Philippine",
"Tajik": "Iranian",
"Tamil": "Southern Dravidian",
"Tatar": "Kypchak",
"Telugu": "Southern Dravidian",
"Turkish": "Oghuz",
"Ukrainian": "East Slavic",
"Urdu": "Central Indo-Aryan",
"Uzbek": "Karluk",
"Vietnamese": "Vietic",
"Volapük": "Constructed",
"Waray-Waray": "Central Philippine",
"Welsh": "Brythonic",
"West Frisian": "West Germanic",
"Western Punjabi": "Punjabi",
"Yoruba": "Yoruboid",
"Esperanto": "Constructed",
"Crimean Tatar": "Kypchak"
}
language_families = {
"Afrikaans": "Germanic",
"Albanian": "Indo-European",
"Arabic": "Afroasiatic",
"Egyptian Arabic": "Afroasiatic",
"Aragonese": "Romance",
"Armenian": "Indo-European",
"Asturian": "Romance",
"Azerbaijani": "Turkic",
"Bashkir": "Turkic",
"Basque": "Language Isolate",
"Bavarian": "Germanic",
"Belarusian": "Indo-European",
"Bengali": "Indo-European",
"Bishnupriya Manipuri": "Indo-European",
"Bosnian": "Indo-European",
"Breton": "Indo-European",
"Bulgarian": "Indo-European",
"Burmese": "Sino-Tibetan",
"Catalan": "Romance",
"Cebuano": "Austronesian",
"Chechen": "Northeast Caucasian",
"Chinese (Simplified)": "Sino-Tibetan",
"Chinese (Traditional)": "Sino-Tibetan",
"Min Nan Chinese": "Sino-Tibetan",
"Chuvash": "Turkic",
"Croatian": "Indo-European",
"Czech": "Indo-European",
"Danish": "Germanic",
"Dutch": "Germanic",
"English": "Germanic",
"Estonian": "Uralic",
"Finnish": "Uralic",
"French": "Romance",
"Galician": "Romance",
"Georgian": "Kartvelian",
"German": "Germanic",
"Greek": "Indo-European",
"Gujarati": "Indo-European",
"Haitian": "Creole",
"Hebrew": "Afroasiatic",
"Hindi": "Indo-European",
"Hungarian": "Uralic",
"Icelandic": "Germanic",
"Ido": "Constructed",
"Indonesian": "Austronesian",
"Irish": "Indo-European",
"Italian": "Romance",
"Japanese": "Japonic",
"Javanese": "Austronesian",
"Kannada": "Dravidian",
"Kazakh": "Turkic",
"Kirghiz": "Turkic",
"Korean": "Koreanic",
"Latin": "Indo-European",
"Latvian": "Indo-European",
"Lithuanian": "Indo-European",
"Lombard": "Romance",
"Low Saxon": "Germanic",
"Luxembourgish": "Germanic",
"Macedonian": "Indo-European",
"Malagasy": "Austronesian",
"Malay": "Austronesian",
"Malayalam": "Dravidian",
"Marathi": "Indo-European",
"Minangkabau": "Austronesian",
"Nepali": "Indo-European",
"Newar": "Sino-Tibetan",
"Norwegian (Bokmal)": "Germanic",
"Norwegian (Nynorsk)": "Germanic",
"Occitan": "Romance",
"Persian (Farsi)": "Indo-European",
"Piedmontese": "Romance",
"Polish": "Indo-European",
"Portuguese": "Romance",
"Punjabi": "Indo-European",
"Romanian": "Romance",
"Russian": "Indo-European",
"Scots": "Germanic",
"Serbian": "Indo-European",
"Serbo-Croatian": "Indo-European",
"Sicilian": "Romance",
"Slovak": "Indo-European",
"Slovenian": "Indo-European",
"South Azerbaijani": "Turkic",
"Spanish": "Romance",
"Sundanese": "Austronesian",
"Swahili": "Niger-Congo",
"Swedish": "Germanic",
"Tagalog": "Austronesian",
"Tajik": "Indo-European",
"Tamil": "Dravidian",
"Tatar": "Turkic",
"Telugu": "Dravidian",
"Turkish": "Turkic",
"Ukrainian": "Indo-European",
"Urdu": "Indo-European",
"Uzbek": "Turkic",
"Vietnamese": "Austroasiatic",
"Volapük": "Constructed",
"Waray-Waray": "Austronesian",
"Welsh": "Indo-European",
"West Frisian": "Germanic",
"Western Punjabi": "Indo-European",
"Yoruba": "Niger-Congo",
"Esperanto": "Constructed",
"Crimean Tatar": "Turkic"
}