lbourdois commited on
Commit
871c434
·
1 Parent(s): 4c735bc

Update language

Browse files

Following the discussion on Slack (https://huggingface.slack.com/archives/C039P47V1L5/p1686049660301789?thread_ts=1686041695.444479&cid=C039P47V1L5), it seems that the languages are rather those of this PR rather than the others. The README would then seem to have to be modified as 208 languages would be taken into account and not 157 as indicated.

Files changed (1) hide show
  1. README.md +117 -75
README.md CHANGED
@@ -5,163 +5,205 @@ tags:
5
  - text-classification
6
  - language-identification
7
  language:
 
 
 
 
8
  - af
 
 
9
  - als
10
  - am
11
- - an
12
  - ar
 
 
13
  - arz
14
  - as
15
  - ast
16
- - az
 
17
  - azb
 
18
  - ba
19
- - bar
20
- - bcl
21
  - be
22
- - bg
23
- - bh
24
  - bn
 
 
25
  - bo
26
- - bpy
27
- - br
28
  - bs
 
 
29
  - ca
30
- - ce
31
  - ceb
32
- - ckb
33
- - co
34
  - cs
35
- - cv
 
 
36
  - cy
37
  - da
38
  - de
39
- - diq
40
- - dv
 
41
  - el
42
- - eml
43
  - en
44
  - eo
45
- - es
46
  - et
47
  - eu
48
- - fa
 
 
49
  - fi
 
50
  - fr
51
- - frr
52
- - fy
53
- - ga
54
  - gd
 
55
  - gl
56
- - gom
57
  - gu
58
- - gv
 
59
  - he
60
  - hi
61
- - hif
62
  - hr
63
- - hsb
64
  - hu
65
  - hy
66
- - ia
67
- - id
68
  - ilo
69
- - io
70
  - is
71
  - it
72
- - ja
73
  - jv
 
 
 
 
 
 
74
  - ka
75
  - kk
 
 
 
76
  - km
77
- - kn
78
- - ku
79
  - ky
80
- - la
81
- - lb
 
 
 
 
 
82
  - li
83
- - lmo
84
  - lt
85
- - lv
 
 
 
 
 
 
 
 
86
  - mai
87
- - mg
88
- - mhr
89
  - min
90
  - mk
91
- - ml
92
- - mn
93
- - mr
94
- - mrj
95
- - ms
96
  - mt
97
- - mwf
 
 
98
  - my
99
- - myv
100
- - mzn
101
- - nah
102
- - nap
103
- - nds
104
- - ne
105
- - new
106
  - nl
107
  - nn
108
- - no
 
109
  - nso
 
 
110
  - oc
111
- - or
112
- - os
113
  - pa
114
- - pam
115
- - pfl
 
 
116
  - pl
117
- - pms
118
- - pnb
119
- - ps
120
  - pt
121
- - qu
122
- - rm
123
  - ro
 
124
  - ru
 
125
  - sa
126
- - sah
127
- - sc
128
  - scn
129
- - sco
130
- - sd
131
- - sh
132
  - si
133
  - sk
134
  - sl
 
 
 
135
  - so
136
- - sq
 
 
137
  - sr
 
138
  - su
139
  - sv
140
- - sw
 
141
  - ta
 
 
142
  - te
143
  - tg
 
144
  - th
 
 
 
 
145
  - tk
146
- - tl
147
  - tr
148
- - tt
 
149
  - ug
150
  - uk
 
151
  - ur
152
- - uz
153
  - vec
154
  - vi
155
- - vls
156
- - vo
157
- - wa
158
  - war
159
- - xmf
160
- - yi
 
161
  - yo
162
- - zea
163
  - zh
 
 
164
  - multilingual
 
 
165
  ---
166
 
167
  # fastText (Language Identification)
 
5
  - text-classification
6
  - language-identification
7
  language:
8
+ - ace
9
+ - acm
10
+ - acq
11
+ - aeb
12
  - af
13
+ - ajp
14
+ - ak
15
  - als
16
  - am
17
+ - apc
18
  - ar
19
+ - ars
20
+ - ary
21
  - arz
22
  - as
23
  - ast
24
+ - awa
25
+ - ayr
26
  - azb
27
+ - azj
28
  - ba
29
+ - bm
30
+ - ban
31
  - be
32
+ - bem
 
33
  - bn
34
+ - bho
35
+ - bjn
36
  - bo
 
 
37
  - bs
38
+ - bug
39
+ - bg
40
  - ca
 
41
  - ceb
 
 
42
  - cs
43
+ - cjk
44
+ - ckb
45
+ - crh
46
  - cy
47
  - da
48
  - de
49
+ - dik
50
+ - dyu
51
+ - dz
52
  - el
 
53
  - en
54
  - eo
 
55
  - et
56
  - eu
57
+ - ee
58
+ - fo
59
+ - fj
60
  - fi
61
+ - fon
62
  - fr
63
+ - fur
64
+ - fuv
65
+ - gaz
66
  - gd
67
+ - ga
68
  - gl
69
+ - gn
70
  - gu
71
+ - ht
72
+ - ha
73
  - he
74
  - hi
75
+ - hne
76
  - hr
 
77
  - hu
78
  - hy
79
+ - ig
 
80
  - ilo
81
+ - id
82
  - is
83
  - it
 
84
  - jv
85
+ - ja
86
+ - kab
87
+ - kac
88
+ - kam
89
+ - kn
90
+ - ks
91
  - ka
92
  - kk
93
+ - kbp
94
+ - kea
95
+ - khk
96
  - km
97
+ - ki
98
+ - rw
99
  - ky
100
+ - kmb
101
+ - kmr
102
+ - knc
103
+ - kg
104
+ - ko
105
+ - lo
106
+ - lij
107
  - li
108
+ - ln
109
  - lt
110
+ - lmo
111
+ - ltg
112
+ - lb
113
+ - lua
114
+ - lg
115
+ - luo
116
+ - lus
117
+ - lvs
118
+ - mag
119
  - mai
120
+ - ml
121
+ - mar
122
  - min
123
  - mk
 
 
 
 
 
124
  - mt
125
+ - mni
126
+ - mos
127
+ - mi
128
  - my
 
 
 
 
 
 
 
129
  - nl
130
  - nn
131
+ - nb
132
+ - npi
133
  - nso
134
+ - nus
135
+ - ny
136
  - oc
137
+ - ory
138
+ - pag
139
  - pa
140
+ - pap
141
+ - pbt
142
+ - pes
143
+ - plt
144
  - pl
 
 
 
145
  - pt
146
+ - prs
147
+ - quy
148
  - ro
149
+ - rn
150
  - ru
151
+ - sg
152
  - sa
153
+ - sat
 
154
  - scn
155
+ - shn
 
 
156
  - si
157
  - sk
158
  - sl
159
+ - sm
160
+ - sn
161
+ - sd
162
  - so
163
+ - st
164
+ - es
165
+ - sc
166
  - sr
167
+ - ss
168
  - su
169
  - sv
170
+ - swh
171
+ - szl
172
  - ta
173
+ - taq
174
+ - tt
175
  - te
176
  - tg
177
+ - tl
178
  - th
179
+ - ti
180
+ - tpi
181
+ - tn
182
+ - ts
183
  - tk
184
+ - tum
185
  - tr
186
+ - tw
187
+ - tzm
188
  - ug
189
  - uk
190
+ - umb
191
  - ur
192
+ - uzn
193
  - vec
194
  - vi
 
 
 
195
  - war
196
+ - wo
197
+ - xh
198
+ - ydd
199
  - yo
200
+ - yue
201
  - zh
202
+ - zsm
203
+ - zu
204
  - multilingual
205
+
206
+ language_details: "ace_Arab, ace_Latn, acm_Arab, acq_Arab, aeb_Arab, afr_Latn, ajp_Arab, aka_Latn, amh_Ethi, apc_Arab, arb_Arab, ars_Arab, ary_Arab, arz_Arab, asm_Beng, ast_Latn, awa_Deva, ayr_Latn, azb_Arab, azj_Latn, bak_Cyrl, bam_Latn, ban_Latn,bel_Cyrl, bem_Latn, ben_Beng, bho_Deva, bjn_Arab, bjn_Latn, bod_Tibt, bos_Latn, bug_Latn, bul_Cyrl, cat_Latn, ceb_Latn, ces_Latn, cjk_Latn, ckb_Arab, crh_Latn, cym_Latn, dan_Latn, deu_Latn, dik_Latn, dyu_Latn, dzo_Tibt, ell_Grek, eng_Latn, epo_Latn, est_Latn, eus_Latn, ewe_Latn, fao_Latn, pes_Arab, fij_Latn, fin_Latn, fon_Latn, fra_Latn, fur_Latn, fuv_Latn, gla_Latn, gle_Latn, glg_Latn, grn_Latn, guj_Gujr, hat_Latn, hau_Latn, heb_Hebr, hin_Deva, hne_Deva, hrv_Latn, hun_Latn, hye_Armn, ibo_Latn, ilo_Latn, ind_Latn, isl_Latn, ita_Latn, jav_Latn, jpn_Jpan, kab_Latn, kac_Latn, kam_Latn, kan_Knda, kas_Arab, kas_Deva, kat_Geor, knc_Arab, knc_Latn, kaz_Cyrl, kbp_Latn, kea_Latn, khm_Khmr, kik_Latn, kin_Latn, kir_Cyrl, kmb_Latn, kon_Latn, kor_Hang, kmr_Latn, lao_Laoo, lvs_Latn, lij_Latn, lim_Latn, lin_Latn, lit_Latn, lmo_Latn, ltg_Latn, ltz_Latn, lua_Latn, lug_Latn, luo_Latn, lus_Latn, mag_Deva, mai_Deva, mal_Mlym, mar_Deva, min_Latn, mkd_Cyrl, plt_Latn, mlt_Latn, mni_Beng, khk_Cyrl, mos_Latn, mri_Latn, zsm_Latn, mya_Mymr, nld_Latn, nno_Latn, nob_Latn, npi_Deva, nso_Latn, nus_Latn, nya_Latn, oci_Latn, gaz_Latn, ory_Orya, pag_Latn, pan_Guru, pap_Latn, pol_Latn, por_Latn, prs_Arab, pbt_Arab, quy_Latn, ron_Latn, run_Latn, rus_Cyrl, sag_Latn, san_Deva, sat_Beng, scn_Latn, shn_Mymr, sin_Sinh, slk_Latn, slv_Latn, smo_Latn, sna_Latn, snd_Arab, som_Latn, sot_Latn, spa_Latn, als_Latn, srd_Latn, srp_Cyrl, ssw_Latn, sun_Latn, swe_Latn, swh_Latn, szl_Latn, tam_Taml, tat_Cyrl, tel_Telu, tgk_Cyrl, tgl_Latn, tha_Thai, tir_Ethi, taq_Latn, taq_Tfng, tpi_Latn, tsn_Latn, tso_Latn, tuk_Latn, tum_Latn, tur_Latn, twi_Latn, tzm_Tfng, uig_Arab, ukr_Cyrl, umb_Latn, urd_Arab, uzn_Latn, vec_Latn, vie_Latn, war_Latn, wol_Latn, xho_Latn, ydd_Hebr, yor_Latn, yue_Hant, zho_Hans, zho_Hant, zul_Latn"
207
  ---
208
 
209
  # fastText (Language Identification)